def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_MCC", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1"} :param thresholds: A value (or list of values) between 0 and 1 :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one) """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ["f1"] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list if not all(isinstance(t, (int, float, long)) for t in thresholds_list) or \ not all(t >= 0 or t <= 1 for t in thresholds_list): raise ValueError("All thresholds must be numbers between 0 and 1 (inclusive).") if not all(m in ["min_per_class_accuracy", "absolute_MCC", "precision", "accuracy", "f0point5", "f2", "f1"] for m in metrics_list): raise ValueError("The only allowable metrics are min_per_class_accuracy, absolute_MCC, precision, accuracy, f0point5, f2, f1") # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i,e in enumerate(thresh2d.cell_values)] cms = [] for t in thresholds_list: idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[8] fns = row[9] fps = row[10] tps = row[11] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[metrics_thresholds.index(t)] table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0,fps],[c1,tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms
def show(self): """ Display a short summary of the metrics. :return: None """ metric_type = self._metric_json['__meta']['schema_type'] types_w_glm = ['ModelMetricsRegressionGLM', 'ModelMetricsBinomialGLM'] types_w_clustering = ['ModelMetricsClustering'] types_w_mult = ['ModelMetricsMultinomial'] types_w_bin = ['ModelMetricsBinomial', 'ModelMetricsBinomialGLM'] types_w_r2 = ['ModelMetricsBinomial', 'ModelMetricsRegression' ] + types_w_glm + types_w_mult types_w_logloss = types_w_bin + types_w_mult print print metric_type + ": " + self._algo reported_on = "** Reported on {} data. **" if self._on_train: print reported_on.format("train") elif self._on_valid: print reported_on.format("validation") else: print reported_on.format("test") print print "MSE: " + str(self.mse()) if metric_type in types_w_r2: print "R^2: " + str(self.r2()) if metric_type in types_w_logloss: print "LogLoss: " + str(self.logloss()) if metric_type in types_w_glm: print "Null degrees of freedom: " + str( self.null_degrees_of_freedom()) print "Residual degrees of freedom: " + str( self.residual_degrees_of_freedom()) print "Null deviance: " + str(self.null_deviance()) print "Residual deviance: " + str(self.residual_deviance()) print "AIC: " + str(self.aic()) if metric_type in types_w_bin: print "AUC: " + str(self.auc()) print "Gini: " + str(self.giniCoef()) ConfusionMatrix(cm=self.confusion_matrices()[0], domains=self._metric_json['domain']).show() self._metric_json["max_criteria_and_metric_scores"].show() if metric_type in types_w_mult: self._metric_json['cm']['table'].show() self._metric_json['hit_ratio_table'].show() if metric_type in types_w_clustering: print "Total Within Cluster Sum of Square Error: " + str( self.tot_withinss()) print "Total Sum of Square Error to Grand Mean: " + str( self.totss()) print "Between Cluster Sum of Square Error: " + str( self.betweenss()) self._metric_json['centroid_stats'].show()
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) among metrics listed in :const:`max_metrics`. Defaults to 'f1'. :param thresholds: A value (or list of values) between 0 and 1. :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one). """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ['f1'] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list assert_is_type(thresholds_list, [numeric]) assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list)) if not all(m.lower() in H2OBinomialModelMetrics.max_metrics for m in metrics_list): raise ValueError("The only allowable metrics are {}", ', '.join(H2OBinomialModelMetrics.max_metrics)) # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) first_metrics_thresholds_offset = len(thresholds_list) - len(metrics_thresholds) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)] cms = [] for i, t in enumerate(thresholds_list): idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[i - first_metrics_thresholds_offset] table_header = "Confusion Matrix (Act/Pred) for max {} @ threshold = {}".format(m, actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = {}".format(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms