def _cluster_info(): global __H2OCONN__ cld = __H2OCONN__._cld # self._session_id = self.get_session_id() ncpus = sum([n['num_cpus'] for n in cld['nodes']]) allowed_cpus = sum([n['cpus_allowed'] for n in cld['nodes']]) mmax = sum([n['max_mem'] for n in cld['nodes']]) cluster_health = all([n['healthy'] for n in cld['nodes']]) ip = "127.0.0.1" if __H2OCONN__._ip == "localhost" else __H2OCONN__._ip cluster_info = [ [ "H2O cluster uptime: ", get_human_readable_time(cld["cloud_uptime_millis"]) ], ["H2O cluster version: ", cld["version"]], ["H2O cluster name: ", cld["cloud_name"]], ["H2O cluster total nodes: ", cld["cloud_size"]], ["H2O cluster total memory: ", get_human_readable_size(mmax)], ["H2O cluster total cores: ", str(ncpus)], ["H2O cluster allowed cores: ", str(allowed_cpus)], ["H2O cluster healthy: ", str(cluster_health)], ["H2O Connection ip: ", ip], ["H2O Connection port: ", __H2OCONN__._port], ] __H2OCONN__._cld = H2OConnection.get_json( url_suffix="Cloud") # update the cached version of cld h2o.H2ODisplay(cluster_info)
def show(self, header=True): #if h2o.can_use_pandas(): # import pandas # pandas.options.display.max_rows = 20 # print pandas.DataFrame(self.cell_values,columns=self.col_header) # return print if header: print self.table_header + ":", if self.table_description: print self.table_description print table = copy.deepcopy(self.cell_values) nr = 0 if _is_list_of_lists(table): nr = len( table ) # only set if we truly have multiple rows... not just one long row :) if nr > 20: # create a truncated view of the table, first/last 5 rows trunc_table = [] trunc_table += [v for v in table[:5]] trunc_table.append(["---"] * len(table[0])) trunc_table += [v for v in table[(nr - 5):]] table = trunc_table h2o.H2ODisplay(table, self.col_header, numalign="left", stralign="left")
def show(self, header=True): print if header: print self.table_header + ":" print table = copy.deepcopy(self.cell_values) nr=0 if _is_list_of_lists(table): nr = len(table) # only set if we truly have multiple rows... not just one long row :) if nr > 20: # create a truncated view of the table, first/last 5 rows trunc_table =[] trunc_table += [ v for v in table[:5]] trunc_table.append(["---"]*len(table[0])) trunc_table += [v for v in table[(nr-5):]] table = trunc_table h2o.H2ODisplay(table, self.col_header, numalign="left", stralign="left")
def show(self, noprint=False): """ Evaluate and print. :return: None """ self.eager() if noprint: if isinstance(self._data, unicode): j = h2o.frame(self._data) data = [ c['data'] if c['type'] != "string" else c["string_data"] for c in j['frames'][0]['columns'][:] ] domains = [c['domain'] for c in j['frames'][0]['columns']] for i in range(len(data)): if domains[i] is not None: for j in range(len(data[i])): if data[i][j] == "NaN": continue data[i][j] = domains[i][int(data[i][j])] data = map(list, zip(*data)) return data[0:min(10, len(data))] return self._data else: if isinstance(self._data, unicode): j = h2o.frame(self._data) data = [c['data'] for c in j['frames'][0]['columns'][:]] elif isinstance(self._data, (int, float, str, list)): print self._data print return else: data = [self._data] t_data = map(list, zip(*data)) t_data = t_data[0:min(10, len(t_data))] for didx, d in enumerate(t_data): t_data[didx].insert(0, didx) headers = ["Row ID"] for i in range(len(t_data[0])): headers.append('') print "Displaying first " + str(len(t_data)) + " row(s)" h2o.H2ODisplay(t_data, headers)
def summary(self, header=True): """ Print a detailed summary of the explored models. """ table = [] for model in self.models: model_summary = model._model_json["output"]["model_summary"] r_values = list(model_summary.cell_values[0]) r_values[0] = model.model_id table.append(r_values) # if h2o.can_use_pandas(): # import pandas # pandas.options.display.max_rows = 20 # print pandas.DataFrame(table,columns=self.col_header) # return print if header: print 'Grid Summary:' print h2o.H2ODisplay(table, ['Model Id'] + model_summary.col_header[1:], numalign="left", stralign="left")
y="flsa_repeat_violator", training_frame=train, validation_frame=test) mod_elapsed = time.time() - s # In[ ]: # Model performance comparison train_ll_orig = dl_orig.model_performance(train).logloss() test_ll_orig = dl_orig.model_performance(test).logloss() train_ll_mod = dl_mod.model_performance(train).logloss() test_ll_mod = dl_mod.model_performance(test).logloss() # Print results in pretty HTML table header = ["Metric", "Original", "Reduced"] table = [ ["Runtime", orig_elapsed, mod_elapsed], ["Train LogLoss", train_ll_orig, train_ll_mod], ["Test LogLoss", test_ll_orig, test_ll_mod], ] h2o.H2ODisplay(table, header) # ### Shut down the cluster # # Shut down the cluster now that we are done using it. # In[ ]: h2o.shutdown(prompt=False)