예제 #1
0
  def confusion_matrix(self, metrics=None, thresholds=None):
    """
    Get the confusion matrix for the specified metric

    :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_MCC", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1"}
    :param thresholds: A value (or list of values) between 0 and 1
    :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one)
    """
    # make lists out of metrics and thresholds arguments
    if metrics is None and thresholds is None: metrics = ["f1"]

    if isinstance(metrics, list): metrics_list = metrics
    elif metrics is None: metrics_list = []
    else: metrics_list = [metrics]

    if isinstance(thresholds, list): thresholds_list = thresholds
    elif thresholds is None: thresholds_list = []
    else: thresholds_list = [thresholds]

    # error check the metrics_list and thresholds_list
    if not all(isinstance(t, (int, float, long)) for t in thresholds_list) or \
            not all(t >= 0 or t <= 1 for t in thresholds_list):
      raise ValueError("All thresholds must be numbers between 0 and 1 (inclusive).")

    if not all(m in ["min_per_class_accuracy", "absolute_MCC", "precision", "accuracy", "f0point5", "f2", "f1"] for m in metrics_list):
      raise ValueError("The only allowable metrics are min_per_class_accuracy, absolute_MCC, precision, accuracy, f0point5, f2, f1")

    # make one big list that combines the thresholds and metric-thresholds
    metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
    for mt in metrics_thresholds:
      thresholds_list.append(mt)

    thresh2d = self._metric_json['thresholds_and_metric_scores']
    actual_thresholds = [float(e[0]) for i,e in enumerate(thresh2d.cell_values)]
    cms = []
    for t in thresholds_list:
      idx = self.find_idx_by_threshold(t)
      row = thresh2d.cell_values[idx]
      tns = row[8]
      fns = row[9]
      fps = row[10]
      tps = row[11]
      p = tps + fns
      n = tns + fps
      c0  = n - fps
      c1  = p - tps
      if t in metrics_thresholds:
        m = metrics_list[metrics_thresholds.index(t)]
        table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(actual_thresholds[idx])
      else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx])
      cms.append(ConfusionMatrix(cm=[[c0,fps],[c1,tps]], domains=self._metric_json['domain'],
                                 table_header=table_header))

    if len(cms) == 1: return cms[0]
    else: return cms
예제 #2
0
    def show(self):
        """
    Display a short summary of the metrics.
    :return: None
    """
        metric_type = self._metric_json['__meta']['schema_type']
        types_w_glm = ['ModelMetricsRegressionGLM', 'ModelMetricsBinomialGLM']
        types_w_clustering = ['ModelMetricsClustering']
        types_w_mult = ['ModelMetricsMultinomial']
        types_w_bin = ['ModelMetricsBinomial', 'ModelMetricsBinomialGLM']
        types_w_r2 = ['ModelMetricsBinomial', 'ModelMetricsRegression'
                      ] + types_w_glm + types_w_mult
        types_w_logloss = types_w_bin + types_w_mult

        print
        print metric_type + ": " + self._algo
        reported_on = "** Reported on {} data. **"
        if self._on_train:
            print reported_on.format("train")
        elif self._on_valid:
            print reported_on.format("validation")
        else:
            print reported_on.format("test")
        print
        print "MSE: " + str(self.mse())
        if metric_type in types_w_r2:
            print "R^2: " + str(self.r2())
        if metric_type in types_w_logloss:
            print "LogLoss: " + str(self.logloss())
        if metric_type in types_w_glm:
            print "Null degrees of freedom: " + str(
                self.null_degrees_of_freedom())
            print "Residual degrees of freedom: " + str(
                self.residual_degrees_of_freedom())
            print "Null deviance: " + str(self.null_deviance())
            print "Residual deviance: " + str(self.residual_deviance())
            print "AIC: " + str(self.aic())
        if metric_type in types_w_bin:
            print "AUC: " + str(self.auc())
            print "Gini: " + str(self.giniCoef())
            ConfusionMatrix(cm=self.confusion_matrices()[0],
                            domains=self._metric_json['domain']).show()
            self._metric_json["max_criteria_and_metric_scores"].show()
        if metric_type in types_w_mult:
            self._metric_json['cm']['table'].show()
            self._metric_json['hit_ratio_table'].show()
        if metric_type in types_w_clustering:
            print "Total Within Cluster Sum of Square Error: " + str(
                self.tot_withinss())
            print "Total Sum of Square Error to Grand Mean: " + str(
                self.totss())
            print "Between Cluster Sum of Square Error: " + str(
                self.betweenss())
            self._metric_json['centroid_stats'].show()
예제 #3
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) among metrics listed in :const:`max_metrics`. Defaults to 'f1'.
        :param thresholds: A value (or list of values) between 0 and 1.
        :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix
            (if there is only one).
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None:
            metrics = ['f1']

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        assert_is_type(thresholds_list, [numeric])
        assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list))

        if not all(m.lower() in H2OBinomialModelMetrics.max_metrics for m in metrics_list):
            raise ValueError("The only allowable metrics are {}", ', '.join(H2OBinomialModelMetrics.max_metrics))

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)
        first_metrics_thresholds_offset = len(thresholds_list) - len(metrics_thresholds)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for i, t in enumerate(thresholds_list):
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[i - first_metrics_thresholds_offset]
                table_header = "Confusion Matrix (Act/Pred) for max {} @ threshold = {}".format(m, actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = {}".format(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms