コード例 #1
0
ファイル: expr.py プロジェクト: Mistobaan/h2o-3
 def _arg_to_expr(arg):
     if arg is not None and isinstance(arg, range): arg = list(arg)
     if arg is None:
         return "[]"  # empty list
     elif isinstance(arg, ExprNode):
         return arg._do_it(False)
     elif isinstance(arg, ASTId):
         return str(arg)
     elif isinstance(arg, bool):
         return "{}".format("TRUE" if arg else "FALSE")
     elif is_numeric(arg):
         return "{}".format("NaN" if math.isnan(arg) else arg)
     elif is_str(arg):
         return '"' + arg + '"'
     elif isinstance(arg, slice):
         return "[{}:{}]".format(0 if arg.start is None else arg.start,
                                 "NaN" if (arg.stop is None or math.isnan(arg.stop)) else (
                                 arg.stop) if arg.start is None else (arg.stop - arg.start))
     elif isinstance(arg, list):
         allstrs = all(is_str(elem) for elem in arg)
         if allstrs:
             return "[%s]" % " ".join('"%s"' % elem for elem in arg)
         else:
             return "[%s]" % " ".join("NaN" if i == 'NaN' or math.isnan(i) else str(i) for i in arg)
     raise ValueError("Unexpected arg type: " + str(type(arg)) + " " + str(arg.__class__) + " " + arg.__repr__())
コード例 #2
0
 def _arg_to_expr(arg):
     if arg is not None and isinstance(arg, range): arg = list(arg)
     if arg is None:
         return "[]"  # empty list
     elif isinstance(arg, ExprNode):
         return arg._do_it(False)
     elif isinstance(arg, ASTId):
         return str(arg)
     elif isinstance(arg, bool):
         return "{}".format("TRUE" if arg else "FALSE")
     elif is_numeric(arg):
         return "{}".format("NaN" if math.isnan(arg) else arg)
     elif is_str(arg):
         return '"' + arg + '"'
     elif isinstance(arg, slice):
         return "[{}:{}]".format(
             0 if arg.start is None else arg.start, "NaN" if
             (arg.stop is None or math.isnan(arg.stop)) else
             (arg.stop) if arg.start is None else (arg.stop - arg.start))
     elif isinstance(arg, list):
         allstrs = all(is_str(elem) for elem in arg)
         if allstrs:
             return "[%s]" % " ".join('"%s"' % elem for elem in arg)
         else:
             return "[%s]" % " ".join(
                 "NaN" if i == 'NaN' or math.isnan(i) else str(i)
                 for i in arg)
     raise ValueError("Unexpected arg type: " + str(type(arg)) + " " +
                      str(arg.__class__) + " " + arg.__repr__())
コード例 #3
0
def _handle_python_dicts(python_obj):
    header = list(python_obj.keys())
    is_valid = all([
        re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header
    ])  # is this a valid header?
    if not is_valid:
        raise ValueError(
            "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ "
        )
    for k in python_obj:  # check that each value entry is a flat list/tuple or single int, float, or string
        v = python_obj[k]
        if isinstance(
                v,
            (tuple, list)):  # if value is a tuple/list, then it must be flat
            if _is_list_of_lists(v):
                raise ValueError("Values in the dictionary must be flattened!")
        elif is_numeric(v) or is_str(v):
            python_obj[k] = [v]
        else:
            raise ValueError(
                "Encountered invalid dictionary value when constructing H2OFrame. Got: {0}"
                .format(v))

    rows = list(map(list, itertools.zip_longest(*list(python_obj.values()))))
    data_to_write = [dict(list(zip(header, row))) for row in rows]
    return header, data_to_write
コード例 #4
0
ファイル: shared_utils.py プロジェクト: digideskio/h2o-3
def _handle_python_dicts(python_obj):
    header = list(python_obj.keys())
    is_valid = all([re.match(r'^[a-zA-Z_][a-zA-Z0-9_.]*$', col) for col in header])  # is this a valid header?
    if not is_valid:
        raise ValueError(
            "Did not get a valid set of column names! Must match the regular expression: ^[a-zA-Z_][a-zA-Z0-9_.]*$ ")
    for k in python_obj:  # check that each value entry is a flat list/tuple or single int, float, or string
        v = python_obj[k]
        if isinstance(v, (tuple, list)):  # if value is a tuple/list, then it must be flat
            if _is_list_of_lists(v):
                raise ValueError("Values in the dictionary must be flattened!")
        elif is_numeric(v) or is_str(v):
            python_obj[k] = [v]
        else:
            raise ValueError("Encountered invalid dictionary value when constructing H2OFrame. Got: {0}".format(v))

    rows = list(map(list, itertools.zip_longest(*list(python_obj.values()))))
    data_to_write = [dict(list(zip(header, row))) for row in rows]
    return header, data_to_write
コード例 #5
0
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"}
        :param thresholds: A value (or list of values) between 0 and 1
        :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one)
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None: metrics = ["f1"]

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        if not all(is_numeric(t) for t in thresholds_list) or \
                not all(t >= 0 or t <= 1 for t in thresholds_list):
            raise ValueError(
                "All thresholds must be numbers between 0 and 1 (inclusive).")

        if not all(m in [
                "min_per_class_accuracy", "absolute_mcc", "precision",
                "recall", "specificity", "accuracy", "f0point5", "f2", "f1",
                "mean_per_class_accuracy"
        ] for m in metrics_list):
            raise ValueError(
                "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, f2, f1, mean_per_class_accuracy"
            )

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [
            self.find_threshold_by_max_metric(m) for m in metrics_list
        ]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [
            float(e[0]) for i, e in enumerate(thresh2d.cell_values)
        ]
        cms = []
        for t in thresholds_list:
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[metrics_thresholds.index(t)]
                table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(
                    actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(
                    actual_thresholds[idx])
            cms.append(
                ConfusionMatrix(cm=[[c0, fps], [c1, tps]],
                                domains=self._metric_json['domain'],
                                table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms
コード例 #6
0
def _is_num_list(l):
    return isinstance(l, (tuple, list)) and all(is_numeric(i) for i in l)
コード例 #7
0
ファイル: metrics_base.py プロジェクト: Ansonparkour/h2o-3
    def confusion_matrix(self, metrics=None, thresholds=None):
        """
        Get the confusion matrix for the specified metric

        :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"}
        :param thresholds: A value (or list of values) between 0 and 1
        :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one)
        """
        # make lists out of metrics and thresholds arguments
        if metrics is None and thresholds is None: metrics = ["f1"]

        if isinstance(metrics, list):
            metrics_list = metrics
        elif metrics is None:
            metrics_list = []
        else:
            metrics_list = [metrics]

        if isinstance(thresholds, list):
            thresholds_list = thresholds
        elif thresholds is None:
            thresholds_list = []
        else:
            thresholds_list = [thresholds]

        # error check the metrics_list and thresholds_list
        if not all(is_numeric(t) for t in thresholds_list) or \
                not all(t >= 0 or t <= 1 for t in thresholds_list):
            raise ValueError("All thresholds must be numbers between 0 and 1 (inclusive).")

        if not all(m in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy",
                         "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list):
            raise ValueError(
                "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, f2, f1, mean_per_class_accuracy")

        # make one big list that combines the thresholds and metric-thresholds
        metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list]
        for mt in metrics_thresholds:
            thresholds_list.append(mt)

        thresh2d = self._metric_json['thresholds_and_metric_scores']
        actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)]
        cms = []
        for t in thresholds_list:
            idx = self.find_idx_by_threshold(t)
            row = thresh2d.cell_values[idx]
            tns = row[11]
            fns = row[12]
            fps = row[13]
            tps = row[14]
            p = tps + fns
            n = tns + fps
            c0 = n - fps
            c1 = p - tps
            if t in metrics_thresholds:
                m = metrics_list[metrics_thresholds.index(t)]
                table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str(
                    actual_thresholds[idx])
            else:
                table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx])
            cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'],
                                       table_header=table_header))

        if len(cms) == 1:
            return cms[0]
        else:
            return cms
コード例 #8
0
ファイル: shared_utils.py プロジェクト: digideskio/h2o-3
def _is_num_list(l):
    return isinstance(l, (tuple, list)) and all(is_numeric(i) for i in l)