def validate_builder(builder): ''' Validate that a model builder seems to have a well-formed parameters list. ''' assert 'parameters' in builder and isinstance(builder['parameters'], list) parameters = builder['parameters'] assert len(parameters) > 0 for parameter in parameters: h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values'])
def testCheckWithModelAPI(self): ###################################################################### # Now look for kmeans_model_name using the one-model API and find_compatible_frames, and check it model = self.a_node.models(key=self.kmeans_model_name, find_compatible_frames=True) found_kmeans = False h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames']) assert self.prostate_key in model['models'][0]['compatible_frames'], \ "Failed to find " + self.prostate_key + " in compatible_frames list." ###################################################################### # Now look for prostate_key using the one-frame API and find_compatible_models, and check it result = self.a_node.frames(key='prostate.hex', find_compatible_models=True) frames = result['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert_true('prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list.") compatible_models = result['compatible_models'] models_dict = h2o_util.list_to_dict(compatible_models, 'key') assert_true ( self.dl_prostate_model_name in models_dict, "Failed to find " + \ self.dl_prostate_model_name + " in compatible models list.") assert_true( self.dl_prostate_model_name in frames[0]['compatible_models']) assert_true(self.kmeans_model_name in frames[0]['compatible_models'])
def validate_builder(algo, builder): ''' Validate that a model builder seems to have a well-formed parameters list. ''' assert 'parameters' in builder, "FAIL: Failed to find parameters list in builder: " + algo + " (" + repr( builder) + ")" assert isinstance( builder['parameters'], list ), "FAIL: 'parameters' element is not a list in builder: " + algo + " (" + repr( builder) + ")" parameters = builder['parameters'] assert len(parameters ) > 0, "FAIL: parameters list is empty: " + algo + " (" + repr( builder) + ")" for parameter in parameters: h2o_util.assertKeysExist(parameter, '', [ 'name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values' ]) assert 'can_build' in builder, "FAIL: Failed to find can_build list in builder: " + algo + " (" + repr( builder) + ")" assert isinstance( builder['can_build'], list ), "FAIL: 'can_build' element is not a list in builder: " + algo + " (" + repr( builder) + ")" assert len( builder['can_build'] ) > 0, "FAIL: 'can_build' list is empty in builder: " + algo + " (" + repr( builder) + ")"
def validate_builder(builder): assert 'parameters' in builder and isinstance(builder['parameters'], list) parameters = builder['parameters'] assert len(parameters) > 0 parameter = parameters[0] h2o_util.assertKeysExist(parameter, '', [ 'name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'dependencies', 'values' ])
def validate_builder(algo, builder): ''' Validate that a model builder seems to have a well-formed parameters list. ''' assert 'parameters' in builder, "FAIL: Failed to find parameters list in builder: " + algo + " (" + repr(builder) + ")" assert isinstance(builder['parameters'], list), "FAIL: 'parameters' element is not a list in builder: " + algo + " (" + repr(builder) + ")" parameters = builder['parameters'] assert len(parameters) > 0, "FAIL: parameters list is empty: " + algo + " (" + repr(builder) + ")" for parameter in parameters: h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values']) assert 'can_build' in builder, "FAIL: Failed to find can_build list in builder: " + algo + " (" + repr(builder) + ")" assert isinstance(builder['can_build'], list), "FAIL: 'can_build' element is not a list in builder: " + algo + " (" + repr(builder) + ")" assert len(builder['can_build']) > 0, "FAIL: 'can_build' list is empty in builder: " + algo + " (" + repr(builder) + ")"
def testImportProstate(self): cleanup(self.a_node) import_result = self.a_node.import_files(path="/Users/radu/h2o-dev/smalldata/logreg/prostate.csv") parse_result = self.a_node.parse(key=import_result['keys'][0]) # TODO: handle multiple files self.prostate_key = parse_result['frames'][0]['key']['name'] # Test /Frames for prostate.csv frames = self.a_node.frames()['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list." # Test /Frames/{key} for prostate.csv frames = self.a_node.frames(key='prostate.hex')['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list." columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'CAPSULE' in columns_dict, "Failed to find CAPSULE in Frames/prostate.hex." assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.columns(key='prostate.hex')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'ID' in columns_dict, "Failed to find ID in Frames/prostate.hex/columns." assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.column(key='prostate.hex', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict['AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE']['bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.summary(key='prostate.hex', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base', 'stride', 'pctiles']) h2o_util.assertKeysExist(col, '', ['domain', 'str_data']) assert col['mins'][0] == 43, 'Failed to find 43 as the first min for AGE.' assert col['maxs'][0] == 79, 'Failed to find 79 as the first max for AGE.' assert col['mean'] == 66.03947368421052, 'Failed to find 66.03947368421052 as the mean for AGE.' assert col['sigma'] == 6.527071269173308, 'Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'Failed to find int as the type for AGE.' assert col['data'][0] == 65, 'Failed to find 65 as the first data for AGE.' assert col['precision'] == -1, 'Failed to find -1 as the precision for AGE.' assert col['bins'][0] == 1, 'Failed to find 1 as the first bin for AGE.' assert col['base'] == 43, 'Failed to find 43 as the base for AGE.' assert col['stride'] == 1, 'Failed to find 1 as the stride for AGE.' assert col['pctiles'][0] == 50.5, 'Failed to find 50.5 as the first pctile for AGE.'
def testCheckWithModelAPI(self): ###################################################################### # Now look for kmeans_model_name using the one-model API and find_compatible_frames, and check it model = self.a_node.models(key=self.kmeans_model_name, find_compatible_frames=True) found_kmeans = False; h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames']) assert self.prostate_key in model['models'][0]['compatible_frames'], \ "Failed to find " + self.prostate_key + " in compatible_frames list." ###################################################################### # Now look for prostate_key using the one-frame API and find_compatible_models, and check it result = self.a_node.frames(key='prostate.hex', find_compatible_models=True) frames = result['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list." compatible_models = result['compatible_models'] models_dict = h2o_util.list_to_dict(compatible_models, 'key') assert self.dl_prostate_model_name in models_dict, "Failed to find " + \ self.dl_prostate_model_name + " in compatible models list." assert self.dl_prostate_model_name in frames[0]['compatible_models'] assert self.kmeans_model_name in frames[0]['compatible_models']
found_dl = False dl_model = None for model in models['models']: if model['key'] == deep_learning_model_name: found_dl = True dl_model = model assert found_dl, 'Did not find ' + deep_learning_model_name + ' in the models list.' validate_actual_parameters(dl_parameters, dl_model['parameters'], prostate_key, None) ###################################################################### # Now look for kmeans_model_name using the one-model API, and check it model = a_node.models(key=kmeans_model_name, find_compatible_frames=True) found_kmeans = False h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames']) h2o_util.assertKeysExist(model['models'][0]['compatible_frames'], '', ['frames']) found = False for frame in model['models'][0]['compatible_frames']['frames']: if frame['key']['name'] == prostate_key: found = True assert found, "Failed to find " + prostate_key + " in compatible_frames list." ################### # test delete_model a_node.delete_model(kmeans_model_name) models = a_node.models() found_kmeans = False
# Test /Frames/{key}/columns/{label} for prostate.csv frames = a_node.column(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['bins'])) assert None is columns_dict['AGE']['bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label}/summary for prostate.csv frames = a_node.summary(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base', 'stride', 'pctiles']) h2o_util.assertKeysExist(col, '', ['domain', 'str_data']) assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.' assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.' assert col['mean'] == 66.03947368421052, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.' assert col['sigma'] == 6.527071269173308, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.' assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.' assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.' assert col['bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.' assert col['base'] == 43, 'FAIL: Failed to find 43 as the base for AGE.' assert col['stride'] == 1, 'FAIL: Failed to find 1 as the stride for AGE.' assert col['pctiles'][0] == 50.5, 'FAIL: Failed to find 50.5 as the first pctile for AGE.' #################################################################################################### # Build and do basic validation checks on models
def testImportProstate(self): cleanup(self.a_node) prostate_tuple = self.cfg.data['prostate'] if (prostate_tuple[0] == "file"): import_result = self.a_node.import_files( path=os.path.abspath(prostate_tuple[1])) else: raise RuntimeError("Unsupported file type specified") parse_result = self.a_node.parse( key=import_result['keys'][0]) # TODO: handle multiple files self.prostate_key = parse_result['frames'][0]['key']['name'] # Test /Frames for prostate.csv frames = self.a_node.frames()['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list." # Test /Frames/{key} for prostate.csv frames = self.a_node.frames(key='prostate.hex')['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert 'prostate.hex' in frames_dict, "Failed to find prostate.hex in Frames list." columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'CAPSULE' in columns_dict, "Failed to find CAPSULE in Frames/prostate.hex." assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict[ 'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE'][ 'bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.columns(key='prostate.hex')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'ID' in columns_dict, "Failed to find ID in Frames/prostate.hex/columns." assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict[ 'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE'][ 'bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.column(key='prostate.hex', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns." assert 'bins' in columns_dict[ 'AGE'], "Failed to find bins in Frames/prostate.hex/columns/AGE." assert None is columns_dict['AGE'][ 'bins'], "Failed to clear bins field." # should be cleared except for /summary frames = self.a_node.summary(key='prostate.hex', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_util.assertKeysExistAndNonNull(col, '', [ 'label', 'missing', 'zeros', 'pinfs', 'ninfs', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'bins', 'base', 'stride', 'pctiles' ]) h2o_util.assertKeysExist(col, '', ['domain', 'str_data']) assert col['mins'][ 0] == 43, 'Failed to find 43 as the first min for AGE.' assert col['maxs'][ 0] == 79, 'Failed to find 79 as the first max for AGE.' assert col[ 'mean'] == 66.03947368421052, 'Failed to find 66.03947368421052 as the mean for AGE.' assert col[ 'sigma'] == 6.527071269173308, 'Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'Failed to find int as the type for AGE.' assert col['data'][ 0] == 65, 'Failed to find 65 as the first data for AGE.' assert col[ 'precision'] == -1, 'Failed to find -1 as the precision for AGE.' assert col['bins'][ 0] == 1, 'Failed to find 1 as the first bin for AGE.' assert col['base'] == 43, 'Failed to find 43 as the base for AGE.' assert col['stride'] == 1, 'Failed to find 1 as the stride for AGE.' assert col['pctiles'][ 0] == 50.5, 'Failed to find 50.5 as the first pctile for AGE.'
def validate_builder(builder): assert 'parameters' in builder and isinstance(builder['parameters'], list) parameters = builder['parameters'] assert len(parameters) > 0 parameter = parameters[0] h2o_util.assertKeysExist(parameter, '', ['name', 'label', 'help', 'required', 'type', 'default_value', 'actual_value', 'level', 'values'])
# Test /Frames/{key}/columns/{label} for prostate.csv frames = a_node.column(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns." assert 'histogram_bins' in columns_dict['AGE'], "FAIL: Failed to find bins in Frames/prostate.hex/columns/AGE." h2o.H2O.verboseprint('bins: ' + repr(columns_dict['AGE']['histogram_bins'])) assert None is columns_dict['AGE']['histogram_bins'], "FAIL: Failed to clear bins field." # should be cleared except for /summary # Test /Frames/{key}/columns/{label}/summary for prostate.csv frames = a_node.summary(key='prostate_binomial', column='AGE')['frames'] columns_dict = h2o_util.list_to_dict(frames[0]['columns'], 'label') assert 'AGE' in columns_dict, "FAIL: Failed to find AGE in Frames/prostate.hex/columns/AGE/summary." col = columns_dict['AGE'] h2o_util.assertKeysExistAndNonNull(col, '', ['label', 'missing_count', 'zero_count', 'positive_infinity_count', 'negative_infinity_count', 'mins', 'maxs', 'mean', 'sigma', 'type', 'data', 'precision', 'histogram_bins', 'histogram_base', 'histogram_stride', 'percentiles']) h2o_util.assertKeysExist(col, '', ['domain', 'string_data']) assert col['mins'][0] == 43, 'FAIL: Failed to find 43 as the first min for AGE.' assert col['maxs'][0] == 79, 'FAIL: Failed to find 79 as the first max for AGE.' assert col['mean'] == 66.03947368421052, 'FAIL: Failed to find 66.03947368421052 as the mean for AGE.' assert col['sigma'] == 6.527071269173308, 'FAIL: Failed to find 6.527071269173308 as the sigma for AGE.' assert col['type'] == 'int', 'FAIL: Failed to find int as the type for AGE.' assert col['data'][0] == 65, 'FAIL: Failed to find 65 as the first data for AGE.' assert col['precision'] == -1, 'FAIL: Failed to find -1 as the precision for AGE.' assert col['histogram_bins'][0] == 1, 'FAIL: Failed to find 1 as the first bin for AGE.' assert col['histogram_base'] == 43, 'FAIL: Failed to find 43 as the histogram_base for AGE.' assert col['histogram_stride'] == 1, 'FAIL: Failed to find 1 as the histogram_stride for AGE.' assert col['percentiles'][0] == 43, 'FAIL: Failed to find 43 as the first percentile for AGE. '+str(col['percentiles'][0]) # Test /SplitFrame for prostate.csv if verbose: print 'Testing SplitFrame with named dest_keys. . .' splits = a_node.split_frame(dataset='prostate_binomial', ratios=[0.8], dest_keys=['bigger', 'smaller'])
# Check dl_airlines_model_name found_dl = False; dl_model = None for model in models['models']: if model['key'] == dl_airlines_model_name: found_dl = True dl_model = model assert found_dl, 'Did not find ' + dl_airlines_model_name + ' in the models list.' validate_actual_parameters(dl_airline_1_parameters, dl_model['parameters'], airlines_key, None) ###################################################################### # Now look for kmeans_model_name using the one-model API, and check it model = a_node.models(key=kmeans_model_name, find_compatible_frames=True) found_kmeans = False; h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames']) h2o_util.assertKeysExist(model['models'][0]['compatible_frames'], '', ['frames']) found = False for frame in model['models'][0]['compatible_frames']['frames']: if frame['key']['name'] == prostate_key: found = True assert found, "Failed to find " + prostate_key + " in compatible_frames list." ################### # test delete_model a_node.delete_model(kmeans_model_name) models = a_node.models() found_kmeans = False;