def jobs(self, job_key=None, timeoutSecs=10, **kwargs): params_dict = { 'job_key': job_key } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'jobs', H2O.verbose) result = self.__do_json_request('/3/Jobs', timeout=timeoutSecs, params=params_dict) return result
def jobs(self, job_key=None, timeoutSecs=10, **kwargs): params_dict = {'job_key': job_key} h2o_util.check_params_update_kwargs(params_dict, kwargs, 'jobs', H2O.verbose) result = self.__do_json_request('/3/Jobs', timeout=timeoutSecs, params=params_dict) return result
def columns(self, key, timeoutSecs=10, **kwargs): params_dict = {'row_offset': 0, 'row_count': 100} h2o_util.check_params_update_kwargs(params_dict, kwargs, 'columns', H2O.verbose) result = self.__do_json_request('/3/Frames/' + key + '/columns', timeout=timeoutSecs, params=params_dict) return result
def column(self, key, column, timeoutSecs=10, **kwargs): params_dict = { 'offset': 0, 'len': 100 } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'column', H2O.verbose) result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column, timeout=timeoutSecs, params=params_dict) return result
def summary(self, key, column, timeoutSecs=10, **kwargs): params_dict = { 'row_offset': 0, 'row_count': 100 } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', H2O.verbose) result = self.__do_json_request('/3/Frames/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict) return result
def parse(self, key, dest_key=None, timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180, noise=None, benchmarkLogging=None, noPoll=False, **kwargs): # # Call ParseSetup?source_frames=[keys] . . . # if benchmarkLogging: cloudPerfH2O.get_log_save(initOnly=True) # TODO: multiple keys parse_setup_params = { 'source_frames': '["' + key + '"]' # NOTE: quote key names } # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=H2O.verbose) setup_result = self.__do_json_request(jsonRequest="/3/ParseSetup", cmd='post', timeout=timeoutSecs, postData=parse_setup_params) H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result)) # # and then Parse?source_frames=<keys list> and params from the ParseSetup result # Parse?source_frames=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&destination_frame=prostate.hex&parse_type=CSV&separator=44&number_columns=9&check_header=0&single_quotes=false&column_names=['ID',CAPSULE','AGE','RACE','DPROS','DCAPS','PSA','VOL','GLEASON] # parse_params = { 'source_frames': '["' + setup_result['source_frames'][0]['name'] + '"]', # TODO: cons up the whole list 'destination_frame': dest_key if dest_key else setup_result['destination_frame'], 'parse_type': setup_result['parse_type'], 'separator': setup_result['separator'], 'single_quotes': setup_result['single_quotes'], 'check_header': setup_result['check_header'], 'number_columns': setup_result['number_columns'], 'column_names': setup_result['column_names'], # gets stringified inside __do_json_request() 'column_types': setup_result['column_types'], # gets stringified inside __do_json_request() 'na_strings': setup_result['na_strings'], 'chunk_size': setup_result['chunk_size'], } H2O.verboseprint("parse_params: " + repr(parse_params)) h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=H2O.verbose) parse_result = self.__do_json_request(jsonRequest="/3/Parse", cmd='post', timeout=timeoutSecs, postData=parse_params, **kwargs) H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result)) # print("Parse result:", repr(parse_result)) job_key = parse_result['job']['key']['name'] # TODO: dislike having different shapes for noPoll and poll if noPoll: return this.jobs(job_key) job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs) if job_json: dest_key = job_json['jobs'][0]['dest']['name'] return self.frames(dest_key) return None
def summary(self, key, column, timeoutSecs=10, **kwargs): params_dict = { 'offset': 0, 'len': 100 } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', True) result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict) return result
def model_builders(self, algo=None, timeoutSecs=10, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', H2O.verbose) if algo: result = self.__do_json_request('3/ModelBuilders/' + algo, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('3/ModelBuilders', timeout=timeoutSecs, params=params_dict) return result
def model_builders(self, algo=None, timeoutSecs=10, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', True) if algo: result = self.__do_json_request('2/ModelBuilders.json/' + algo, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('2/ModelBuilders.json', timeout=timeoutSecs, params=params_dict) return result
def models(self, api_version=3, key=None, timeoutSecs=20, **kwargs): params_dict = { 'find_compatible_frames': False } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', H2O.verbose) if key: result = self.__do_json_request(str(api_version) + '/Models/' + key, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request(str(api_version) + '/Models', timeout=timeoutSecs, params=params_dict) return result
def models(self, key=None, timeoutSecs=10, **kwargs): params_dict = { 'find_compatible_frames': False } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', True) if key: result = self.__do_json_request('3/Models.json/' + key, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('3/Models.json', timeout=timeoutSecs, params=params_dict) return result
def frames(self, key=None, timeoutSecs=10, **kwargs): params_dict = { 'find_compatible_models': 0, 'row_offset': 0, 'row_count': 100 } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', H2O.verbose) if key: result = self.__do_json_request('/3/Frames/' + key, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('/3/Frames', timeout=timeoutSecs, params=params_dict) return result
def frames(self, key=None, timeoutSecs=10, **kwargs): params_dict = { 'find_compatible_models': 0, 'offset': 0, 'len': 100 # TODO: len and offset are not working yet } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', H2O.verbose) if key: result = self.__do_json_request('3/Frames.json/' + key, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('3/Frames.json', timeout=timeoutSecs, params=params_dict) return result
def frames(self, key=None, timeoutSecs=10, **kwargs): params_dict = { 'find_compatible_models': 0, 'offset': 0, 'len': 100 # TODO: len and offset are not working yet } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', True) if key: result = self.__do_json_request('3/Frames.json/' + key, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('3/Frames.json', timeout=timeoutSecs, params=params_dict) return result
def model_builders(self, algo=None, timeoutSecs=10, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', H2O.verbose) if algo: if algo in H2O.experimental_algos: _rest_version = 99 else: _rest_version = 3 result = self.__do_json_request(str(_rest_version)+'/ModelBuilders/' + algo, timeout=timeoutSecs, params=params_dict) else: result = self.__do_json_request('3/ModelBuilders', timeout=timeoutSecs, params=params_dict) return result
def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', True) start_time = time.time() while True: H2O.verboseprint('Polling for job: ' + job_key + '. . .') result = self.__do_json_request('2/Jobs.json/' + job_key, timeout=timeoutSecs, params=params_dict) if result['jobs'][0]['status'] == 'DONE' or result['jobs'][0]['status'] == 'CANCELLED' or result['jobs'][0]['status'] == 'FAILED': H2O.verboseprint('Job ' + result['jobs'][0]['status'] + ': ' + job_key + '.') return result if time.time() - start_time > timeoutSecs: H2O.verboseprint('Job: ' + job_key + ' timed out in: ' + timeoutSecs + '.') return None time.sleep(retryDelaySecs)
def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', H2O.verbose) start_time = time.time() while True: H2O.verboseprint('Polling for job: ' + job_key + '. . .') result = self.__do_json_request('/3/Jobs/' + job_key, timeout=timeoutSecs, params=params_dict) status = result['jobs'][0]['status'] if status == 'DONE' or status == 'CANCELLED' or status == 'FAILED': H2O.verboseprint('Job ' + status + ': ' + job_key + '.') return result if time.time() - start_time > timeoutSecs: print 'Job: ' + job_key + ' timed out in: ' + str(timeoutSecs) + '.' # downstream checkers should tolerate None. Print msg in case it's overlooked. return None time.sleep(retryDelaySecs)
def summary(self, key, column, timeoutSecs=10, **kwargs): params_dict = { } h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', True) result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict) return result
def parse(self, key, key2=None, timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180, noise=None, benchmarkLogging=None, noPoll=False, **kwargs): # # Call ParseSetup?srcs=[keys] . . . # if benchmarkLogging: cloudPerfH2O.get_log_save(initOnly=True) # TODO: multiple keys parse_setup_params = { 'srcs': "[" + key + "]" } # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=True) setup_result = self.__do_json_request(jsonRequest="ParseSetup.json", timeout=timeoutSecs, params=parse_setup_params) H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result)) # # and then Parse?srcs=<keys list> and params from the ParseSetup result # Parse?srcs=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&hex=prostate.hex&pType=CSV&sep=44&ncols=9&checkHeader=0&singleQuotes=false&columnNames=[ID,%20CAPSULE,%20AGE,%20RACE,%20DPROS,%20DCAPS,%20PSA,%20VOL,%20GLEASON] # first = True ascii_column_names = '[' for s in setup_result['columnNames']: if not first: ascii_column_names += ', ' ascii_column_names += str(s) first = False ascii_column_names += ']' parse_params = { 'srcs': "[" + setup_result['srcs'][0]['name'] + "]", # TODO: cons up the whole list 'hex': setup_result['hexName'], 'pType': setup_result['pType'], 'sep': setup_result['sep'], 'ncols': setup_result['ncols'], 'checkHeader': setup_result['checkHeader'], 'singleQuotes': setup_result['singleQuotes'], 'columnNames': ascii_column_names, } print "parse_params: ", parse_params h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=True) parse_result = self.__do_json_request(jsonRequest="Parse.json", timeout=timeoutSecs, params=parse_params, **kwargs) H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result)) job_key = parse_result['job']['name'] # TODO: dislike having different shapes for noPoll and poll if noPoll: return this.jobs(job_key) job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs) if job_json: dest_key = job_json['jobs'][0]['dest']['name'] return self.frames(dest_key) return None