def test_table(): """Test functionality of the H2OTwoDimTable class.""" tbl1 = H2OTwoDimTable(cell_values=[[1, 2, 3], [10, 20, 30]], col_header=list("ABC")) tbl1.show() print() tbl2 = H2OTwoDimTable(cell_values=[[1, 2, 4]] * 10, col_header=["q1", "q2", "q3"], row_header=range(10), table_header="Table 2") tbl2.show() assert tbl2["q1"] == [1] * 10 assert tbl2["q2"] == [2] * 10 assert tbl2["q3"] == [4] * 10 assert tbl2[0] == [1] * 10 assert tbl2[-1] == [4] * 10 assert tbl2[[0, 1]] == [[1] * 10, [2] * 10] assert tbl2[["q3"]] == [[4] * 10] try: H2OTwoDimTable(cell_values=[[1, 2, 3, 4], [1, 2, 3]]) except H2OTypeError: pass
def glrm_catagorical_bug_fix(): print("Importing prostate.csv data...") tbl2 = H2OTwoDimTable(cell_values=[[1, 2, 4]] * 10, col_header=["q1", "q2", "q3"], row_header=range(10), table_header="Table 2") # H2OTwoDimTable containing the correct archetype values run before Wendy optimized memory for GLRM cell_values = [[ 'Arch1', 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 58.295918367346935, 8.810102040816325, 11.344897959183678, 6.285714285714286 ], [ 'Arch2', 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 69.35514018691589, 7.538224299065424, 10.087757009345797, 5.6168224299065415 ], [ 'Arch3', 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 64.68, 75.892, 10.812000000000001, 7.44 ], [ 'Arch4', 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 68.77083333333333, 13.368750000000002, 49.44583333333334, 5.9375 ], [ 'Arch5', 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 69.04901960784314, 16.140196078431373, 11.510000000000005, 7.235294117647059 ]] col_header = [ 'dprosboth', 'dprosleft', 'dprosnone', 'dprosright', 'raceblack', 'racena', 'racewhite', 'capsuleno', 'capsuleyes', 'dcapsno', 'dcapsyes', 'age', 'psa', 'vol', 'gleason' ] row_header = ['Arch1', 'Arch2', 'Arch3', 'Arch4', 'Arch5'] table_header = "archetypes" correct_archetype = H2OTwoDimTable(cell_values=cell_values, col_header=col_header, row_header=row_header, table_header=table_header) prostateF = h2o.upload_file( pyunit_utils.locate("smalldata/prostate/prostate_cat.csv")) glrm_h2o = H2OGeneralizedLowRankEstimator(k=5, recover_svd=True, seed=1234) glrm_h2o.train(x=prostateF.names, training_frame=prostateF) glrm_h2o.show() assert pyunit_utils.equal_2d_tables(glrm_h2o._model_json["output"]["archetypes"]._cell_values, correct_archetype._cell_values, tolerance=1e-4), \ "GLRM model archetypes generated from current model are not correct."
def __new__(cls, keyvals): # This method is called by the simplejson.json(object_pairs_hook=<this>) # `keyvals` is a list of (key,value) tuples. For example: # [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")] schema = None for k, v in keyvals: if k == "__meta" and isinstance(v, dict): schema = v["schema_name"] break if k == "__schema" and is_type(v, str): schema = v break if schema == "MetadataV3": return H2OMetadataV3.make(keyvals) if schema == "CloudV3": return H2OCluster.make(keyvals) if schema == "H2OErrorV3": return H2OErrorV3.make(keyvals) if schema == "H2OModelBuilderErrorV3": return H2OModelBuilderErrorV3.make(keyvals) if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals) if schema == "ModelMetricsRegressionV3": return H2ORegressionModelMetrics.make(keyvals) if schema == "ModelMetricsClusteringV3": return H2OClusteringModelMetrics.make(keyvals) if schema == "ModelMetricsBinomialV3": return H2OBinomialModelMetrics.make(keyvals) if schema == "ModelMetricsBinomialUpliftV3": return H2OBinomialUpliftModelMetrics.make(keyvals) if schema == "ModelMetricsMultinomialV3": return H2OMultinomialModelMetrics.make(keyvals) if schema == "ModelMetricsOrdinalV3": return H2OOrdinalModelMetrics.make(keyvals) if schema == "ModelMetricsAutoEncoderV3": return H2OAutoEncoderModelMetrics.make(keyvals) return super(H2OResponse, cls).__new__(cls, keyvals)
def sort_by(self, metric, increasing=True): """ Sort the models in the grid space by a metric. Parameters ---------- metric: str A metric ('logloss', 'auc', 'r2') by which to sort the models. If addtional arguments are desired, they can be passed to the metric, for example 'logloss(valid=True)' increasing: boolean, optional Sort the metric in increasing (True) (default) or decreasing (False) order. Returns ------- An H2OTwoDimTable of the sorted models showing model id, hyperparameters, and metric value. The best model can be selected and used for prediction. Examples -------- >>> grid_search_results = gs.sort_by('F1', False) >>> best_model_id = grid_search_results['Model Id'][0] >>> best_model = h2o.get_model(best_model_id) >>> best_model.predict(test_data) """ if metric[-1] != ')': metric += '()' c_values = [list(x) for x in zip(*sorted(eval('self.' + metric + '.items()'), key=lambda k_v: k_v[1]))] c_values.insert(1, [self.get_hyperparams(model_id, display=False) for model_id in c_values[0]]) if not increasing: for col in c_values: col.reverse() if metric[-2] == '(': metric = metric[:-2] return H2OTwoDimTable( col_header=['Model Id', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys())) + ']', metric], table_header='Grid Search Results for ' + self.model.__class__.__name__, cell_values=[list(x) for x in zip(*c_values)])
def __init__(self, cm, domains=None, table_header=None): if not cm: raise ValueError("Missing data, `cm_raw` is None") if not isinstance(cm, list): raise ValueError("`cm` is not a list. Got: " + type(cm)) if len(cm) == 2: cm = list(zip(*cm)) # transpose if 2x2 nclass = len(cm) class_errs = [0] * nclass class_sums = [0] * nclass class_err_strings = [0] * nclass cell_values = [[0] * (1 + nclass)] * (1 + nclass) totals = [sum(c) for c in cm] total_errs = 0 for i in range(nclass): class_errs[i] = sum([v[i] for v in cm[:i] + cm[(i + 1):]]) total_errs += class_errs[i] class_sums[i] = sum([v[i] for v in cm]) # row sums class_err_strings[i] = \ " (" + str(class_errs[i]) + "/" + str(class_sums[i]) + ")" class_errs[i] = float("nan") if class_sums[i] == 0 else round( class_errs[i] / class_sums[i], self.ROUND) # and the cell_values are cell_values[i] = [v[i] for v in cm] + [str(class_errs[i]) ] + [class_err_strings[i]] # tally up the totals class_errs += [sum(class_errs)] totals += [sum(class_sums)] class_err_strings += [ " (" + str(total_errs) + "/" + str(totals[-1]) + ")" ] class_errs[-1] = float("nan") if totals[-1] == 0 else round( total_errs / totals[-1], self.ROUND) # do the last row of cell_values ... the "totals" row cell_values[-1] = totals[0:-1] + [str(class_errs[-1]) ] + [class_err_strings[-1]] if table_header is None: table_header = "Confusion Matrix (Act/Pred)" col_header = [""] # no column label for the "rows" column if domains is not None: import copy row_header = copy.deepcopy(domains) col_header += copy.deepcopy(domains) else: row_header = [str(i) for i in range(nclass)] col_header += [str(i) for i in range(nclass)] row_header += ["Total"] col_header += ["Error", "Rate"] for i in range(len(row_header)): cell_values[i].insert(0, row_header[i]) self.table = H2OTwoDimTable(row_header=row_header, col_header=col_header, table_header=table_header, cell_values=cell_values)
def get_status(self): """ Returns H2OTwoDimTable with current cluster status information. """ keys = _cluster_status_info_keys values = self._get_cluster_status_info_values() table = H2OTwoDimTable(cell_values=[values], col_header=keys) return table
def show(self): """Print models sorted by metric""" hyper_combos = itertools.product(*list(self.hyper_params.values())) if not self.models: c_values = [[idx+1, list(val)] for idx, val in enumerate(hyper_combos)] print(H2OTwoDimTable(col_header=['Model', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys()))+']'], table_header='Grid Search of Model ' + self.model.__class__.__name__, cell_values=c_values)) else: print(self.sorted_metric_table())
def sort_by(self, metric, increasing=True): """Deprecated since 2016-12-12, use grid.get_grid() instead.""" if metric[-1] != ')': metric += '()' c_values = [list(x) for x in zip(*sorted(eval('self.' + metric + '.items()'), key=lambda k_v: k_v[1]))] c_values.insert(1, [self.get_hyperparams(model_id, display=False) for model_id in c_values[0]]) if not increasing: for col in c_values: col.reverse() if metric[-2] == '(': metric = metric[:-2] return H2OTwoDimTable( col_header=['Model Id', 'Hyperparameters: [' + ', '.join(list(self.hyper_params.keys())) + ']', metric], table_header='Grid Search Results for ' + self.model.__class__.__name__, cell_values=[list(x) for x in zip(*c_values)])
def get_status_details(self): """ Returns H2OTwoDimTable with detailed current status information about each node. """ if self._retrieved_at + self.REFRESH_INTERVAL < time.time(): # Info is stale, need to refresh new_info = h2o.api("GET /3/Cloud") self._fill_from_h2ocluster(new_info) keys = _cluster_status_detailed_info_keys[:] node_table = [["Node %d" % (j + 1)] + [node[k] for k in keys] for j, node in enumerate(self.nodes)] keys.insert(0, "node") table = H2OTwoDimTable(cell_values=node_table, col_header=keys, row_header=keys) return table
def __new__(cls, keyvals): # This method is called by the simplejson.json(object_pairs_hook=<this>) # `keyvals` is a list of (key,value) tuples. For example: # [("schema_version", 3), ("schema_name", "InitIDV3"), ("schema_type", "Iced")] schema = None for k, v in keyvals: if k == "__meta" and isinstance(v, dict): schema = v["schema_name"] break if k == "__schema" and is_str(v): schema = v break if schema == "CloudV3": return H2OCluster(keyvals) if schema == "H2OErrorV3": return H2OErrorV3(keyvals) if schema == "H2OModelBuilderErrorV3": return H2OModelBuilderErrorV3(keyvals) if schema == "TwoDimTableV3": return H2OTwoDimTable.make(keyvals) if schema == "ModelMetricsRegressionV3": return H2ORegressionModelMetrics.make(keyvals) if schema == "ModelMetricsClusteringV3": return H2OClusteringModelMetrics.make(keyvals) if schema == "ModelMetricsBinomialV3": return H2OBinomialModelMetrics.make(keyvals) if schema == "ModelMetricsMultinomialV3": return H2OMultinomialModelMetrics.make(keyvals) if schema == "ModelMetricsAutoEncoderV3": return H2OAutoEncoderModelMetrics.make(keyvals) return super(H2OResponse, cls).__new__(cls, keyvals)