def parse(self, key, dest_key=None, timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180, noise=None, benchmarkLogging=None, noPoll=False, **kwargs): # # Call ParseSetup?source_frames=[keys] . . . # if benchmarkLogging: cloudPerfH2O.get_log_save(initOnly=True) # TODO: multiple keys parse_setup_params = { 'source_frames': '["' + key + '"]' # NOTE: quote key names } # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=H2O.verbose) setup_result = self.__do_json_request(jsonRequest="/3/ParseSetup", cmd='post', timeout=timeoutSecs, postData=parse_setup_params) H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result)) # # and then Parse?source_frames=<keys list> and params from the ParseSetup result # Parse?source_frames=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&destination_frame=prostate.hex&parse_type=CSV&separator=44&number_columns=9&check_header=0&single_quotes=false&column_names=['ID',CAPSULE','AGE','RACE','DPROS','DCAPS','PSA','VOL','GLEASON] # parse_params = { 'source_frames': '["' + setup_result['source_frames'][0]['name'] + '"]', # TODO: cons up the whole list 'destination_frame': dest_key if dest_key else setup_result['destination_frame'], 'parse_type': setup_result['parse_type'], 'separator': setup_result['separator'], 'single_quotes': setup_result['single_quotes'], 'check_header': setup_result['check_header'], 'number_columns': setup_result['number_columns'], 'column_names': setup_result['column_names'], # gets stringified inside __do_json_request() 'column_types': setup_result['column_types'], # gets stringified inside __do_json_request() 'na_strings': setup_result['na_strings'], 'chunk_size': setup_result['chunk_size'], } H2O.verboseprint("parse_params: " + repr(parse_params)) h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=H2O.verbose) parse_result = self.__do_json_request(jsonRequest="/3/Parse", cmd='post', timeout=timeoutSecs, postData=parse_params, **kwargs) H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result)) # print("Parse result:", repr(parse_result)) job_key = parse_result['job']['key']['name'] # TODO: dislike having different shapes for noPoll and poll if noPoll: return this.jobs(job_key) job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs) if job_json: dest_key = job_json['jobs'][0]['dest']['name'] return self.frames(dest_key) return None
def split_frame(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('/3/SplitFrame', cmd="post", timeout=timeoutSecs, postData=kwargs) H2O.verboseprint("\nsplit_frame result:", h2o_util.dump_json(a)) return a
def create_frame(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('3/CreateFrame', cmd="post", timeout=timeoutSecs, params=kwargs) H2O.verboseprint("\ncreate_frame result:", h2o_util.dump_json(a)) return a
def interaction(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('/3/Interaction', cmd="post", timeout=timeoutSecs, postData=kwargs) H2O.verboseprint("\ninteraction result:", h2o_util.dump_json(a)) return a
def interaction(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('/3/Interaction', cmd="post", timeout=timeoutSecs, postData=kwargs ) H2O.verboseprint("\ninteraction result:", h2o_util.dump_json(a)) return a
def split_frame(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('/3/SplitFrame', cmd="post", timeout=timeoutSecs, postData=kwargs ) H2O.verboseprint("\nsplit_frame result:", h2o_util.dump_json(a)) return a
def import_files(self, path, timeoutSecs=180): a = self.__do_json_request('/3/ImportFiles', timeout=timeoutSecs, params={"path": path} ) H2O.verboseprint("\nimport_files result:", h2o_util.dump_json(a)) return a
def import_files(self, path, timeoutSecs=180): a = self.__do_json_request('2/ImportFiles.json', timeout=timeoutSecs, params={"path": path} ) H2O.verboseprint("\nimport_files result:", h2o_util.dump_json(a)) return a
def create_frame(self, timeoutSecs=180, **kwargs): a = self.__do_json_request('3/CreateFrame', cmd="post", timeout=timeoutSecs, params=kwargs ) H2O.verboseprint("\ncreate_frame result:", h2o_util.dump_json(a)) return a
def cleanup(a_node, models=None, frames=None): ################### # test delete_model if models is None: a_node.delete_models() else: for model in models: a_node.delete_model(model) ms = a_node.models() if models is None: assert 'models' in ms and 0 == len( ms['models'] ), "Called delete_models and the models list isn't empty: " + h2o_util.dump_json( ms) else: for model in models: for m in ms['models']: assert m[ 'key'] != model, 'Found model that we tried to delete in the models list: ' + model ################### # test delete_frame if frames is not None: for frame in frames: a_node.delete_frame(frame) ms = a_node.frames() found = False for m in ms['frames']: assert m[ 'key'] != frame, 'Found frame that we tried to delete in the frames list: ' + frame
def validate_predictions(result, model_name, frame_key, expected_rows, destination_key=None): ''' Validate a /Predictions result. ''' assert p is not None, "FAIL: Got a null result for scoring: " + model_name + " on: " + frame_key assert 'model_metrics' in p, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain a model_metrics object." mm = p['model_metrics'][0] h2o.H2O.verboseprint('mm: ', repr(mm)) #assert 'auc' in mm, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain an AUC." #assert 'cm' in mm, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain a CM." assert 'predictions' in mm, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain an predictions section." assert 'key' in mm['predictions'], "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain a key." assert 'name' in mm['predictions']['key'], "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain a key name." predictions_key = mm['predictions']['key']['name'] f = a_node.frames(key=predictions_key, find_compatible_models=True, row_count=5) frames = f['frames'] frames_dict = h2o_util.list_to_dict(frames, 'key/name') assert predictions_key in frames_dict, "FAIL: Failed to find predictions key" + predictions_key + " in Frames list." predictions = mm['predictions'] h2o.H2O.verboseprint('p: ', repr(p)) assert 'columns' in predictions, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain an columns section." assert len(predictions['columns']) > 0, "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " does not contain any columns." assert 'label' in predictions['columns'][0], "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " column 0 has no label element." assert 'predict' == predictions['columns'][0]['label'], "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " column 0 is not 'predict'." assert expected_rows == predictions['rows'], "FAIL: Predictions for scoring: " + model_name + " on: " + frame_key + " has an unexpected number of rows." assert 'destination_key' in result, "FAIL: failed to find 'destination_key' in predict result:" + h2o_util.dump_json(result) assert 'name' in result['destination_key'], "FAIL: failed to find name in 'destination_key' in predict result:" + h2o_util.dump_json(result) if destination_key is not None: assert destination_key == result['destination_key']['name'], "FAIL: bad value for 'destination_key' in predict result; expected: " + destination_key + ", got: " + result['destination_key']['name']
if raiseIfNon200 and not r: raise Exception("Maybe bad url? no r in __do_json_request in %s:" % inspect.stack()[1][3]) # this is used to open a browser on results, or to redo the operation in the browser # we don't' have that may urls flying around, so let's keep them all H2O.json_url_history.append(r.url) # if r.json(): # raise Exception("Maybe bad url? no r.json in __do_json_request in %s:" % inspect.stack()[1][3]) rjson = None if returnFast: return try: rjson = r.json() except: print h2o_util.dump_json(r.text) if not isinstance(r, (list, dict)): raise Exception("h2o json responses should always be lists or dicts, see previous for text") raise Exception("Could not decode any json from the request.") # TODO # TODO # TODO # TODO: we should really only look in the response object. This check # prevents us from having a field called "error" (e.g., for a scoring result). for e in ['error', 'Error', 'errors', 'Errors']: # error can be null (python None). This happens in exec2 if e in rjson and rjson[e]: H2O.verboseprint("rjson:" + h2o_util.dump_json(rjson)) emsg = 'rjson %s in %s: %s' % (e, inspect.stack()[1][3], rjson[e])
for frame in model['models'][0]['compatible_frames']['frames']: if frame['key']['name'] == prostate_key: found = True assert found, "Failed to find " + prostate_key + " in compatible_frames list." ################### # test delete_model a_node.delete_model(kmeans_model_name) models = a_node.models() found_kmeans = False for model in models['models']: if model['key'] == 'KMeansModel': found_kmeans = True assert not found_kmeans, 'Found KMeansModel in the models list: ' + h2o_util.dump_json( models) #################### # test delete_models jobs = a_node.build_model(algo='kmeans', training_frame=prostate_key, parameters={'K': 2}, timeoutSecs=240) # synchronous a_node.delete_models() models = a_node.models() assert 'models' in models and 0 == len( models['models'] ), "Called delete_models and the models list isn't empty: " + h2o_util.dump_json( models)
def cleanup(a_node, models=None, frames=None): ''' DELETE the specified models and frames from H2O. ''' ################### # test delete_model if models is None: a_node.delete_models() else: for model in models: a_node.delete_model(model) ms = a_node.models() if models is None: assert 'models' in ms and 0 == len(ms['models']), "FAIL: Called delete_models and the models list isn't empty: " + h2o_util.dump_json(ms) else: for model in models: for m in ms['models']: assert m['key'] != model, 'FAIL: Found model that we tried to delete in the models list: ' + model ################### # test delete_frame if frames is not None: for frame in frames: a_node.delete_frame(frame) ms = a_node.frames(len=5) found = False; for m in ms['frames']: assert m['key'] != frame, 'FAIL: Found frame that we tried to delete in the frames list: ' + frame
found_kmeans = False; h2o_util.assertKeysExist(model['models'][0], '', ['compatible_frames']) h2o_util.assertKeysExist(model['models'][0]['compatible_frames'], '', ['frames']) found = False for frame in model['models'][0]['compatible_frames']['frames']: if frame['key']['name'] == prostate_key: found = True assert found, "Failed to find " + prostate_key + " in compatible_frames list." ################### # test delete_model a_node.delete_model(kmeans_model_name) models = a_node.models() found_kmeans = False; for model in models['models']: if model['key'] == 'KMeansModel': found_kmeans = True assert not found_kmeans, 'Found KMeansModel in the models list: ' + h2o_util.dump_json(models) #################### # test delete_models jobs = a_node.build_model(algo='kmeans', training_frame=prostate_key, parameters={'K': 2 }, timeoutSecs=240) # synchronous a_node.delete_models() models = a_node.models() assert 'models' in models and 0 == len(models['models']), "Called delete_models and the models list isn't empty: " + h2o_util.dump_json(models)
dl_prostate_bad_parameters = {'response_column': 'CAPSULE', 'hidden': "[10, 20, 10]", 'input_dropout_ratio': 27 } parameters_validation = a_node.build_model(algo='deeplearning', destination_key='deeplearning_prostate_binomial_bad', training_frame='prostate_binomial', parameters=dl_prostate_bad_parameters, timeoutSecs=240) # synchronous validate_validation_messages(parameters_validation, ['input_dropout_ratio']) assert parameters_validation['__http_response']['status_code'] == requests.codes.precondition_failed, "FAIL: expected 412 Precondition Failed from a bad build request, got: " + str(parameters_validation['__http_response']['status_code']) if verbose: print 'Done trying to build DeepLearning model with bad parameters.' print("WARNING: Terminating test before the end because we don't have as.factor yet. . .") # TODO: Remove after deeplearning_prostate_binomial is updated sys.exit(0) ################################### # Compute and check ModelMetrics for 'deeplearning_prostate_binomial' mm = a_node.compute_model_metrics(model='deeplearning_prostate_binomial', frame='prostate_binomial') assert mm is not None, "FAIL: Got a null result for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' assert 'model_category' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain a model_category." assert 'Binomial' == mm['model_category'], "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " model_category is not Binomial, it is: " + str(mm['model_category']) assert 'AUC' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain an AUC element: " + h2o_util.dump_json(mm) assert type(mm['AUC']) is float, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " AUC element is not a float: " + h2o_util.dump_json(mm) assert 'confusion_matrices' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain a confusion_matrices element: " + h2o_util.dump_json(mm) assert type(mm['confusion_matrices']) is list, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " confusion_matrices element is not a list: " + h2o_util.dump_json(mm) # print h2o_util.dump_json(mm) h2o.H2O.verboseprint("ModelMetrics for scoring: ", 'deeplearning_prostate_binomial', " on: ", 'prostate_binomial', ": ", repr(mm)) ################################### # Check for ModelMetrics for 'deeplearning_prostate_binomial' in full list mms = a_node.model_metrics() # fetch all assert 'model_metrics' in mms, 'FAIL: Failed to find model_metrics in result of /3/ModelMetrics.' found_mm = False for mm in mms['model_metrics']: assert 'model' in mm, "FAIL: mm does not contain a model element: " + repr(mm)
raise Exception("Maybe bad url? no r in __do_json_request in %s:" % inspect.stack()[1][3]) # this is used to open a browser on results, or to redo the operation in the browser # we don't' have that may urls flying around, so let's keep them all H2O.json_url_history.append(r.url) # if r.json(): # raise Exception("Maybe bad url? no r.json in __do_json_request in %s:" % inspect.stack()[1][3]) rjson = None if returnFast: return try: rjson = r.json() except: print h2o_util.dump_json(r.text) if not isinstance(r, (list, dict)): raise Exception( "h2o json responses should always be lists or dicts, see previous for text" ) raise Exception("Could not decode any json from the request.") # TODO # TODO # TODO # TODO: we should really only look in the response object. This check # prevents us from having a field called "error" (e.g., for a scoring result). for e in ['error', 'Error', 'errors', 'Errors']: # error can be null (python None). This happens in exec2 if e in rjson and rjson[e]:
def parse(self, key, key2=None, timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180, noise=None, benchmarkLogging=None, noPoll=False, **kwargs): # # Call ParseSetup?srcs=[keys] . . . # if benchmarkLogging: cloudPerfH2O.get_log_save(initOnly=True) # TODO: multiple keys parse_setup_params = { 'srcs': "[" + key + "]" } # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=True) setup_result = self.__do_json_request(jsonRequest="ParseSetup.json", timeout=timeoutSecs, params=parse_setup_params) H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result)) # # and then Parse?srcs=<keys list> and params from the ParseSetup result # Parse?srcs=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&hex=prostate.hex&pType=CSV&sep=44&ncols=9&checkHeader=0&singleQuotes=false&columnNames=[ID,%20CAPSULE,%20AGE,%20RACE,%20DPROS,%20DCAPS,%20PSA,%20VOL,%20GLEASON] # first = True ascii_column_names = '[' for s in setup_result['columnNames']: if not first: ascii_column_names += ', ' ascii_column_names += str(s) first = False ascii_column_names += ']' parse_params = { 'srcs': "[" + setup_result['srcs'][0]['name'] + "]", # TODO: cons up the whole list 'hex': setup_result['hexName'], 'pType': setup_result['pType'], 'sep': setup_result['sep'], 'ncols': setup_result['ncols'], 'checkHeader': setup_result['checkHeader'], 'singleQuotes': setup_result['singleQuotes'], 'columnNames': ascii_column_names, } print "parse_params: ", parse_params h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=True) parse_result = self.__do_json_request(jsonRequest="Parse.json", timeout=timeoutSecs, params=parse_params, **kwargs) H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result)) job_key = parse_result['job']['name'] # TODO: dislike having different shapes for noPoll and poll if noPoll: return this.jobs(job_key) job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs) if job_json: dest_key = job_json['jobs'][0]['dest']['name'] return self.frames(dest_key) return None
validate_predictions(p, 'deeplearning_airlines_binomial', 'airlines_binomial', 43978, destination_key='deeplearning_airlines_binomial_predictions') validate_frame_exists('deeplearning_airlines_binomial_predictions') h2o.H2O.verboseprint("Predictions for scoring: ", 'deeplearning_airlines_binomial', " on: ", 'airlines_binomial', ": ", repr(p)) # print h2o_util.dump_json(p) print("WARNING: Terminating test before the end because we don't have as.factor yet. . .") # TODO: Remove after deeplearning_prostate_binomial is updated sys.exit(0) ################################### # Compute and check ModelMetrics for 'deeplearning_prostate_binomial' mm = a_node.compute_model_metrics(model='deeplearning_prostate_binomial', frame='prostate_binomial') assert mm is not None, "FAIL: Got a null result for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' assert 'model_category' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain a model_category." assert 'Binomial' == mm['model_category'], "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " model_category is not Binomial, it is: " + str(mm['model_category']) assert 'AUC' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain an AUC element: " + h2o_util.dump_json(mm) assert type(mm['AUC']) is float, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " AUC element is not a float: " + h2o_util.dump_json(mm) assert 'confusion_matrices' in mm, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " does not contain a confusion_matrices element: " + h2o_util.dump_json(mm) assert type(mm['confusion_matrices']) is list, "FAIL: ModelMetrics for scoring: " + 'deeplearning_prostate_binomial' + " on: " + 'prostate_binomial' + " confusion_matrices element is not a list: " + h2o_util.dump_json(mm) # print h2o_util.dump_json(mm) h2o.H2O.verboseprint("ModelMetrics for scoring: ", 'deeplearning_prostate_binomial', " on: ", 'prostate_binomial', ": ", repr(mm)) ################################### # Check for ModelMetrics for 'deeplearning_prostate_binomial' in full list mms = a_node.model_metrics() # fetch all assert 'model_metrics' in mms, 'FAIL: Failed to find model_metrics in result of /3/ModelMetrics.' found_mm = False for mm in mms['model_metrics']: assert 'model' in mm, "FAIL: mm does not contain a model element: " + repr(mm)