コード例 #1
0
    def mean_per_class_error(self,
                             thresholds=None,
                             train=False,
                             valid=False,
                             xval=False):
        """
        Get the mean per class error for a set of thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the mean per class error value for the training data.
        :param bool valid: If True, return the mean per class error value for the validation data.
        :param bool xval: If True, return the mean per class error value for each of the cross-validated splits.

        :returns: The mean per class error values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            if v is None:
                m[k] = None
            else:
                m[k] = [[mpca[0], 1 - mpca[1]]
                        for mpca in v.metric("mean_per_class_accuracy",
                                             thresholds=thresholds)]
        return list(m.values())[0] if len(m) == 1 else m
コード例 #2
0
    def metric(self,
               metric,
               thresholds=None,
               train=False,
               valid=False,
               xval=False):
        """
        Get the metric value for a set of thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param str metric: name of the metric to retrieve.
        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the metric value for the training data.
        :param bool valid: If True, return the metric value for the validation data.
        :param bool xval: If True, return the metric value for each of the cross-validated splits.

        :returns: The metric values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.metric(metric, thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #3
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def F1(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the F1 value for a set of thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where
        the keys are "train", "valid", and "xval".

        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the F1 value for the training data.
        :param bool valid: If True, return the F1 value for the validation data.
        :param bool xval: If True, return the F1 value for each of the cross-validated splits.

        :returns: The F1 values for the specified key(s).

        :examples:
            >>> import h2o as ml
            >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
            >>> ml.init()
            >>> rows=[[1,2,3,4,0],[2,1,2,4,1],[2,1,4,2,1],[0,1,2,34,1],[2,3,4,1,0]]*50
            >>> fr = ml.H2OFrame(rows)
            >>> fr[4] = fr[4].asfactor()
            >>> model = H2OGradientBoostingEstimator(ntrees=10, max_depth=10, nfolds=4)
            >>> model.train(x=range(4), y=4, training_frame=fr)
            >>> model.F1(train=True)
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.metric("f1", thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #4
0
    def F1(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the F1 value for a set of thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where
        the keys are "train", "valid", and "xval".

        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the F1 value for the training data.
        :param bool valid: If True, return the F1 value for the validation data.
        :param bool xval: If True, return the F1 value for each of the cross-validated splits.

        :returns: The F1 values for the specified key(s).

        :examples:
            >>> import h2o as ml
            >>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
            >>> ml.init()
            >>> rows=[[1,2,3,4,0],[2,1,2,4,1],[2,1,4,2,1],[0,1,2,34,1],[2,3,4,1,0]]*50
            >>> fr = ml.H2OFrame(rows)
            >>> fr[4] = fr[4].asfactor()
            >>> model = H2OGradientBoostingEstimator(ntrees=10, max_depth=10, nfolds=4)
            >>> model.train(x=range(4), y=4, training_frame=fr)
            >>> model.F1(train=True)
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.metric("f1", thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #5
0
ファイル: model_base.py プロジェクト: dataplayr/h2o-3
    def rmsle(self, train=False, valid=False, xval=False):
        """
        Get the rmsle.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        Parameters
        ----------
        train : bool, default=True
          If train is True, then return the rmsle value for the training data.
        valid : bool, default=True
          If valid is True, then return the rmsle value for the validation data.
        xval : bool, default=True
          If xval is True, then return the rmsle value for the cross validation data.

        Returns
        -------
          The rmsle for this regression model.
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm): m[k] = None if v is None else v.rmsle()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #6
0
 def _tabulate(self, tablefmt="simple", rollups=False):
     """Pretty tabulated string of all the cached data, and column names"""
     if not self.is_valid(): self.fill()
     # Pretty print cached data
     d = collections.OrderedDict()
     # If also printing the rollup stats, build a full row-header
     if rollups:
         col = next(iter(viewvalues(self._data)))  # Get a sample column
         lrows = len(col['data'])  # Cached rows being displayed
         d[""] = ["type", "mins", "mean", "maxs", "sigma", "zeros", "missing"] + list(map(str, range(lrows)))
     # For all columns...
     for k, v in viewitems(self._data):
         x = v['data']  # Data to display
         t = v["type"]  # Column type
         if t == "enum":
             domain = v['domain']  # Map to cat strings as needed
             x = ["" if math.isnan(idx) else domain[int(idx)] for idx in x]
         elif t == "time":
             x = ["" if math.isnan(z) else time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(z / 1000)) for z in x]
         if rollups:  # Rollups, if requested
             mins = v['mins'][0] if v['mins'] else None
             maxs = v['maxs'][0] if v['maxs'] else None
             x = [v['type'], mins, v['mean'], maxs, v['sigma'], v['zero_count'], v['missing_count']] + x
         d[k] = x  # Insert into ordered-dict
     return tabulate.tabulate(d, headers="keys", tablefmt=tablefmt)
コード例 #7
0
    def confusion_matrix(self,
                         metrics=None,
                         thresholds=None,
                         train=False,
                         valid=False,
                         xval=False):
        """
        Get the confusion matrix for the specified metrics/thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the
        keys are "train", "valid", and "xval"

        :param metrics: One or more of ``"min_per_class_accuracy"``, ``"absolute_mcc"``, ``"tnr"``, ``"fnr"``,
            ``"fpr"``, ``"tpr"``, ``"precision"``, ``"accuracy"``, ``"f0point5"``, ``"f2"``, ``"f1"``.
        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the confusion matrix value for the training data.
        :param bool valid: If True, return the confusion matrix value for the validation data.
        :param bool xval: If True, return the confusion matrix value for each of the cross-validated splits.

        :returns: The confusion matrix values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.confusion_matrix(
                metrics=metrics, thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #8
0
ファイル: expr.py プロジェクト: michalkurka/h2o-3
 def _tabulate(self, tablefmt="simple", rollups=False, rows=10):
     """Pretty tabulated string of all the cached data, and column names"""
     if not self.is_valid(): self.fill(rows=rows)
     # Pretty print cached data
     d = collections.OrderedDict()
     # If also printing the rollup stats, build a full row-header
     if rollups:
         col = next(iter(viewvalues(self._data)))  # Get a sample column
         lrows = len(col['data'])  # Cached rows being displayed
         d[""] = ["type", "mins", "mean", "maxs", "sigma", "zeros", "missing"] + list(map(str, range(lrows)))
     # For all columns...
     for k, v in viewitems(self._data):
         x = v['data']  # Data to display
         t = v["type"]  # Column type
         if t == "enum":
             domain = v['domain']  # Map to cat strings as needed
             x = ["" if math.isnan(idx) else domain[int(idx)] for idx in x]
         elif t == "time":
             x = ["" if math.isnan(z) else time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(z / 1000)) for z in x]
         if rollups:  # Rollups, if requested
             mins = v['mins'][0] if v['mins'] and v["type"] != "enum" else None
             maxs = v['maxs'][0] if v['maxs'] and v["type"] != "enum" else None
             #Cross check type with mean and sigma. Set to None if of type enum.
             if v['type'] == "enum":
                 v['mean'] = v['sigma'] = v['zero_count'] = None
             x = [v['type'], mins, v['mean'], maxs, v['sigma'], v['zero_count'], v['missing_count']] + x
         d[k] = x  # Insert into ordered-dict
     return tabulate.tabulate(d, headers="keys", tablefmt=tablefmt)
コード例 #9
0
ファイル: typechecks.py プロジェクト: StevenLOL/h2o-3
 def check(self, var):
     """Return True if the variable matches this type, and False otherwise."""
     if not isinstance(var, dict): return False
     if any(key not in self._types for key in var): return False
     for key, ktype in viewitems(self._types):
         val = var.get(key, None)
         if not _check_type(val, ktype):
             return False
     return True
コード例 #10
0
ファイル: typechecks.py プロジェクト: slang1998/h2o-3
 def check(self, var):
     """Return True if the variable matches this type, and False otherwise."""
     if not isinstance(var, dict): return False
     if any(key not in self._types for key in var): return False
     for key, ktype in viewitems(self._types):
         val = var.get(key, None)
         if not _check_type(val, ktype):
             return False
     return True
コード例 #11
0
ファイル: typechecks.py プロジェクト: slang1998/h2o-3
def _check_type(var, vtype):
    """
    Return True if the variable is of the specified type, and False otherwise.

    :param var: variable to check
    :param vtype: expected variable's type
    """
    if vtype is None:
        return var is None
    if isinstance(vtype, _primitive_type):
        return var == vtype
    if vtype is str:
        return isinstance(var, _str_type)
    if vtype is int:
        return isinstance(var, _int_type)
    if vtype is numeric:
        return isinstance(var, _num_type)
    if isinstance(vtype, MagicType):
        return vtype.check(var)
    if isinstance(vtype, type):
        # ``vtype`` is a name of the class, or a built-in type such as "list", "tuple", etc
        return isinstance(var, vtype)
    if isinstance(vtype, list):
        # ``vtype`` is a list literal
        elem_type = U(*vtype)
        return isinstance(var, list) and all(
            _check_type(item, elem_type) for item in var)
    if isinstance(vtype, set):
        # ``vtype`` is a set literal
        elem_type = U(*vtype)
        return isinstance(var, set) and all(
            _check_type(item, elem_type) for item in var)
    if isinstance(vtype, tuple):
        # ``vtype`` is a tuple literal
        return (isinstance(var, tuple) and len(vtype) == len(var) and all(
            _check_type(var[i], vtype[i]) for i in range(len(vtype))))
    if isinstance(vtype, dict):
        # ``vtype`` is a dict literal
        ttkv = U(*viewitems(vtype))
        return isinstance(var, dict) and all(
            _check_type(kv, ttkv) for kv in viewitems(var))
    if isinstance(vtype, (FunctionType, BuiltinFunctionType)):
        return vtype(var)
    raise RuntimeError("Ivalid type %r in _check_type()" % vtype)
コード例 #12
0
ファイル: typechecks.py プロジェクト: h2oai/h2o-3
def _check_type(var, vtype):
    """
    Return True if the variable is of the specified type, and False otherwise.

    :param var: variable to check
    :param vtype: expected variable's type
    """
    if vtype is None:
        return var is None
    if isinstance(vtype, _primitive_type):
        return var == vtype
    if vtype is str:
        return isinstance(var, _str_type)
    if vtype is int:
        return isinstance(var, _int_type)
    if vtype is numeric:
        return isinstance(var, _num_type)
    if isinstance(vtype, MagicType):
        return vtype.check(var)
    if isinstance(vtype, type):
        # ``vtype`` is a name of the class, or a built-in type such as "list", "tuple", etc
        return isinstance(var, vtype)
    if isinstance(vtype, list):
        # ``vtype`` is a list literal
        elem_type = U(*vtype)
        return isinstance(var, list) and all(_check_type(item, elem_type) for item in var)
    if isinstance(vtype, set):
        # ``vtype`` is a set literal
        elem_type = U(*vtype)
        return isinstance(var, set) and all(_check_type(item, elem_type) for item in var)
    if isinstance(vtype, tuple):
        # ``vtype`` is a tuple literal
        return (
            isinstance(var, tuple)
            and len(vtype) == len(var)
            and all(_check_type(var[i], vtype[i]) for i in range(len(vtype)))
        )
    if isinstance(vtype, dict):
        # ``vtype`` is a dict literal
        ttkv = U(*viewitems(vtype))
        return isinstance(var, dict) and all(_check_type(kv, ttkv) for kv in viewitems(var))
    if isinstance(vtype, (FunctionType, BuiltinFunctionType)):
        return vtype(var)
    raise RuntimeError("Ivalid type %r in _check_type()" % vtype)
コード例 #13
0
 def _delegate_to_metrics(self, method, train=False, valid=False, xval=False, **kwargs):
     tm = ModelBase._get_metrics(self, train, valid, xval)
     m = {}
     for k, v in viewitems(tm):
         if v is None:
             m[k] = None
         elif hasattr(v, method) and callable(getattr(v, method)):
             m[k] = getattr(v, method)(**kwargs)
         else:
             raise ValueError('no method {} in {}'.format(method, type(v)))
     return list(m.values())[0] if len(m) == 1 else m
コード例 #14
0
ファイル: model_base.py プロジェクト: dataplayr/h2o-3
    def gini(self, train=False, valid=False, xval=False):
        """
        Get the Gini coefficient.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval"

        :param train: If train is True, then return the Gini Coefficient value for the training data.
        :param valid: If valid is True, then return the Gini Coefficient value for the validation data.
        :param xval:  If xval is True, then return the Gini Coefficient value for the cross validation data.

        :returns: The Gini Coefficient for this binomial model.
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm): m[k] = None if v is None else v.gini()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #15
0
ファイル: model_base.py プロジェクト: dataplayr/h2o-3
    def mean_residual_deviance(self, train=False, valid=False, xval=False):
        """
        Get the Mean Residual Deviances.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param train: If train is True, then return the Mean Residual Deviance value for the training data.
        :param valid: If valid is True, then return the Mean Residual Deviance value for the validation data.
        :param xval:  If xval is True, then return the Mean Residual Deviance value for the cross validation data.

        :returns: The Mean Residual Deviance for this regression model.
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm): m[k] = None if v is None else v.mean_residual_deviance()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #16
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def find_threshold_by_max_metric(self, metric, train=False, valid=False, xval=False):
        """
        If all are False (default), then return the training metric value.

        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param str metric: The metric to search for.
        :param bool train: If True, return the find threshold by max metric value for the training data.
        :param bool valid: If True, return the find threshold by max metric value for the validation data.
        :param bool xval: If True, return the find threshold by max metric value for each of the cross-validated splits.

        :returns: The find threshold by max metric values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.find_threshold_by_max_metric(metric)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #17
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def gains_lift(self, train=False, valid=False, xval=False):
        """
        Get the Gains/Lift table for the specified metrics.

        If all are False (default), then return the training metric Gains/Lift table.
        If more than one options is set to True, then return a dictionary of metrics where t
        he keys are "train", "valid", and "xval".

        :param bool train: If True, return the gains lift value for the training data.
        :param bool valid: If True, return the gains lift value for the validation data.
        :param bool xval: If True, return the gains lift value for each of the cross-validated splits.

        :returns: The gains lift values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.gains_lift()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #18
0
ファイル: typechecks.py プロジェクト: slang1998/h2o-3
def _get_type_name(vtype, dump=None):
    """
    Return the name of the provided type.

        _get_type_name(int) == "integer"
        _get_type_name(str) == "string"
        _get_type_name(tuple) == "tuple"
        _get_type_name(Exception) == "Exception"
        _get_type_name(U(int, float, bool)) == "integer|float|bool"
        _get_type_name(U(H2OFrame, None)) == "?H2OFrame"
    """
    if vtype is None:
        return "None"
    if vtype is str:
        return "string"
    if vtype is int:
        return "integer"
    if vtype is numeric:
        return "numeric"
    if is_type(vtype, str):
        return '"%s"' % repr(vtype)[1:-1]
    if is_type(vtype, int):
        return str(vtype)
    if isinstance(vtype, MagicType):
        return vtype.name(dump)
    if isinstance(vtype, type):
        return vtype.__name__
    if isinstance(vtype, list):
        return "list(%s)" % _get_type_name(U(*vtype), dump)
    if isinstance(vtype, set):
        return "set(%s)" % _get_type_name(U(*vtype), dump)
    if isinstance(vtype, tuple):
        return "(%s)" % ", ".join(_get_type_name(item, dump) for item in vtype)
    if isinstance(vtype, dict):
        return "dict(%s)" % ", ".join(
            "%s: %s" % (_get_type_name(tk, dump), _get_type_name(tv, dump))
            for tk, tv in viewitems(vtype))
    if isinstance(vtype, (FunctionType, BuiltinFunctionType)):
        if vtype.__name__ == "<lambda>":
            return _get_lambda_source_code(vtype, dump)
        else:
            return vtype.__name__
    raise RuntimeError("Unexpected `vtype`: %r" % vtype)
コード例 #19
0
    def gains_lift(self, train=False, valid=False, xval=False):
        """
        Get the Gains/Lift table for the specified metrics.

        If all are False (default), then return the training metric Gains/Lift table.
        If more than one options is set to True, then return a dictionary of metrics where t
        he keys are "train", "valid", and "xval".

        :param bool train: If True, return the gains lift value for the training data.
        :param bool valid: If True, return the gains lift value for the validation data.
        :param bool xval: If True, return the gains lift value for each of the cross-validated splits.

        :returns: The gains lift values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.gains_lift()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #20
0
ファイル: typechecks.py プロジェクト: StevenLOL/h2o-3
def _get_type_name(vtype, dump=None):
    """
    Return the name of the provided type.

        _get_type_name(int) == "integer"
        _get_type_name(str) == "string"
        _get_type_name(tuple) == "tuple"
        _get_type_name(Exception) == "Exception"
        _get_type_name(U(int, float, bool)) == "integer|float|bool"
        _get_type_name(U(H2OFrame, None)) == "?H2OFrame"
    """
    if vtype is None:
        return "None"
    if vtype is str:
        return "string"
    if vtype is int:
        return "integer"
    if vtype is numeric:
        return "numeric"
    if is_type(vtype, str):
        return '"%s"' % repr(vtype)[1:-1]
    if is_type(vtype, int):
        return str(vtype)
    if isinstance(vtype, MagicType):
        return vtype.name(dump)
    if isinstance(vtype, type):
        return vtype.__name__
    if isinstance(vtype, list):
        return "list(%s)" % _get_type_name(U(*vtype), dump)
    if isinstance(vtype, set):
        return "set(%s)" % _get_type_name(U(*vtype), dump)
    if isinstance(vtype, tuple):
        return "(%s)" % ", ".join(_get_type_name(item, dump) for item in vtype)
    if isinstance(vtype, dict):
        return "dict(%s)" % ", ".join("%s: %s" % (_get_type_name(tk, dump), _get_type_name(tv, dump))
                                      for tk, tv in viewitems(vtype))
    if isinstance(vtype, (FunctionType, BuiltinFunctionType)):
        if vtype.__name__ == "<lambda>":
            return _get_lambda_source_code(vtype, dump)
        else:
            return vtype.__name__
    raise RuntimeError("Unexpected `vtype`: %r" % vtype)
コード例 #21
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def find_idx_by_threshold(self, threshold, train=False, valid=False, xval=False):
        """
        Retrieve the index in this metric's threshold list at which the given threshold is located.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param float threshold: Threshold value to search for in the threshold list.
        :param bool train: If True, return the find idx by threshold value for the training data.
        :param bool valid: If True, return the find idx by threshold value for the validation data.
        :param bool xval: If True, return the find idx by threshold value for each of the cross-validated splits.

        :returns: The find idx by threshold values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.find_idx_by_threshold(threshold)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #22
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def accuracy(self, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the accuracy for a set of thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the accuracy value for the training data.
        :param bool valid: If True, return the accuracy value for the validation data.
        :param bool xval: If True, return the accuracy value for each of the cross-validated splits.

        :returns: The accuracy values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.metric("accuracy", thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #23
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def roc(self, train=False, valid=False, xval=False):
        """
        Return the coordinates of the ROC curve for a given set of data.

        The coordinates are two-tuples containing the false positive rates as a list and true positive rates as a list.
        If all are False (default), then return is the training data. If more than one ROC
        curve is requested, the data is returned as a dictionary of two-tuples.

        :param bool train: If True, return the ROC value for the training data.
        :param bool valid: If True, return the ROC value for the validation data.
        :param bool xval: If True, return the ROC value for each of the cross-validated splits.

        :returns: The ROC values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):

            if v is not None:
                m[k] = (v.fprs, v.tprs)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #24
0
    def roc(self, train=False, valid=False, xval=False):
        """
        Return the coordinates of the ROC curve for a given set of data.

        The coordinates are two-tuples containing the false positive rates as a list and true positive rates as a list.
        If all are False (default), then return is the training data. If more than one ROC
        curve is requested, the data is returned as a dictionary of two-tuples.

        :param bool train: If True, return the ROC value for the training data.
        :param bool valid: If True, return the ROC value for the validation data.
        :param bool xval: If True, return the ROC value for each of the cross-validated splits.

        :returns: The ROC values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):

            if v is not None:
                m[k] = (v.fprs, v.tprs)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #25
0
ファイル: binomial.py プロジェクト: StevenLOL/h2o-3
    def confusion_matrix(self, metrics=None, thresholds=None, train=False, valid=False, xval=False):
        """
        Get the confusion matrix for the specified metrics/thresholds.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the
        keys are "train", "valid", and "xval"

        :param metrics: One or more of ``"min_per_class_accuracy"``, ``"absolute_mcc"``, ``"tnr"``, ``"fnr"``,
            ``"fpr"``, ``"tpr"``, ``"precision"``, ``"accuracy"``, ``"f0point5"``, ``"f2"``, ``"f1"``.
        :param thresholds: If None, then the thresholds in this set of metrics will be used.
        :param bool train: If True, return the confusion matrix value for the training data.
        :param bool valid: If True, return the confusion matrix value for the validation data.
        :param bool xval: If True, return the confusion matrix value for each of the cross-validated splits.

        :returns: The confusion matrix values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.confusion_matrix(metrics=metrics, thresholds=thresholds)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #26
0
ファイル: model_base.py プロジェクト: dataplayr/h2o-3
    def r2(self, train=False, valid=False, xval=False):
        """
        Return the R^2 for this regression model.

        Will return R^2 for GLM Models and will return NaN otherwise.

        The R^2 value is defined to be 1 - MSE/var, where var is computed as sigma*sigma.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param train: If train is True, then return the R^2 value for the training data.
        :param valid: If valid is True, then return the R^2 value for the validation data.
        :param xval:  If xval is True, then return the R^2 value for the cross validation data.

        :returns: The R^2 for this regression model.
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm): m[k] = None if v is None else v.r2()
        return list(m.values())[0] if len(m) == 1 else m
コード例 #27
0
    def find_threshold_by_max_metric(self,
                                     metric,
                                     train=False,
                                     valid=False,
                                     xval=False):
        """
        If all are False (default), then return the training metric value.

        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param str metric: The metric to search for.
        :param bool train: If True, return the find threshold by max metric value for the training data.
        :param bool valid: If True, return the find threshold by max metric value for the validation data.
        :param bool xval: If True, return the find threshold by max metric value for each of the cross-validated splits.

        :returns: The find threshold by max metric values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.find_threshold_by_max_metric(
                metric)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #28
0
    def find_idx_by_threshold(self,
                              threshold,
                              train=False,
                              valid=False,
                              xval=False):
        """
        Retrieve the index in this metric's threshold list at which the given threshold is located.

        If all are False (default), then return the training metric value.
        If more than one options is set to True, then return a dictionary of metrics where the keys are "train",
        "valid", and "xval".

        :param float threshold: Threshold value to search for in the threshold list.
        :param bool train: If True, return the find idx by threshold value for the training data.
        :param bool valid: If True, return the find idx by threshold value for the validation data.
        :param bool xval: If True, return the find idx by threshold value for each of the cross-validated splits.

        :returns: The find idx by threshold values for the specified key(s).
        """
        tm = ModelBase._get_metrics(self, train, valid, xval)
        m = {}
        for k, v in viewitems(tm):
            m[k] = None if v is None else v.find_idx_by_threshold(threshold)
        return list(m.values())[0] if len(m) == 1 else m
コード例 #29
0
ファイル: typechecks.py プロジェクト: h2oai/h2o-3
 def name(self, src=None):
     """Return string representing the name of this type."""
     return "{%s}" % ", ".join("%s: %s" % (key, _get_type_name(ktype, src)) for key, ktype in viewitems(self._types))
コード例 #30
0
ファイル: typechecks.py プロジェクト: slang1998/h2o-3
 def name(self, src=None):
     """Return string representing the name of this type."""
     return "{%s}" % ", ".join("%s: %s" % (key, _get_type_name(ktype, src))
                               for key, ktype in viewitems(self._types))