def precision(y_true, y_score, k=None, return_bounds=False): """ If return_bounds is False then returns precision on the labeled examples in the top k. If return_bounds is True the returns a tuple containing: - precision on the labeled examples in the top k - number of labeled examples in the top k - lower bound of precision in the top k, assuming all unlabaled examples are False - upper bound of precision in the top k, assuming all unlabaled examples are True """ y_true, y_score = to_float(y_true, y_score) top = _argtop(y_score, k) n = np.nan_to_num(y_true[top]).sum() # fill missing labels with 0 d = (~np.isnan(y_true[top])).sum() # count number of labels p = n / d if return_bounds: k = len(y_true) if k is None else k bounds = (n / k, (n + k - d) / k) if k != 0 else (np.nan, np.nan) return p, d, bounds[0], bounds[1] else: return p
def precision(y_true, y_score, k=None, return_bounds=False): """ If return_bounds is False then returns precision on the labeled examples in the top k. If return_bounds is True the returns a tuple containing: - precision on the labeled examples in the top k - number of labeled examples in the top k - lower bound of precision in the top k, assuming all unlabaled examples are False - upper bound of precision in the top k, assuming all unlabaled examples are True """ y_true, y_score = to_float(y_true, y_score) top = _argtop(y_score, k) n = np.nan_to_num(y_true[top]).sum() # fill missing labels with 0 d = (~np.isnan(y_true[top])).sum() # count number of labels p = n/d if return_bounds: k = len(y_true) if k is None else k bounds = (n/k, (n+k-d)/k) if k != 0 else (np.nan, np.nan) return p, d, bounds[0], bounds[1] else: return p
def count(y_true, y_score=None, countna=False): """ Counts the number of examples. If countna is False then only count labeled examples, i.e. those with y_true not NaN """ if not countna: return (~np.isnan(to_float(y_true))).sum() else: return len(y_true)
def precision_series(y_true, y_score, k=None): """ Returns series of length k whose i-th entry is the precision in the top i TODO: extrapolate here """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score, k) n = np.nan_to_num(y_true[top]).cumsum() # fill missing labels with 0 d = (~np.isnan(y_true[top])).cumsum() # count number of labels return pd.Series(n / d, index=np.arange(1, len(n) + 1))
def precision_series(y_true, y_score, k=None): """ Returns series of length k whose i-th entry is the precision in the top i TODO: extrapolate here """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score, k) n = np.nan_to_num(y_true[top]).cumsum() # fill missing labels with 0 d = (~np.isnan(y_true[top])).cumsum() # count number of labels return pd.Series(n/d, index=np.arange(1,len(n)+1))
def recall_series(y_true, y_score, k=None, value=True): """ Returns series of length k whose i-th entry is the recall in the top i """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score, k) if not value: y_true = 1 - y_true a = np.nan_to_num(y_true[top]).cumsum() return pd.Series(a, index=np.arange(1, len(a) + 1))
def recall_series(y_true, y_score, k=None, value=True): """ Returns series of length k whose i-th entry is the recall in the top i """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score, k) if not value: y_true = 1-y_true a = np.nan_to_num(y_true[top]).cumsum() return pd.Series(a, index=np.arange(1,len(a)+1))
def count_series(y_true, y_score, countna=False): """ Returns series whose i-th entry is the number of examples in the top i """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score) if not countna: a = (~np.isnan(y_true[top])).cumsum() else: a = range(1, len(y_true) + 1) return pd.Series(a, index=range(1, len(a) + 1))
def count_series(y_true, y_score, countna=False): """ Returns series whose i-th entry is the number of examples in the top i """ y_true, y_score = to_float(y_true, y_score) top = _argsort(y_score) if not countna: a = (~np.isnan(y_true[top])).cumsum() else: a = range(1, len(y)+1) return pd.Series(a, index=range(1, len(a)+1))
def recall(y_true, y_score, k=None, value=True): """ Returns recall (number of positive examples) in the top k If value is False then counts number of negative examples TODO: add prop argument to return recall proportion instead of count """ y_true, y_score = to_float(y_true, y_score) top = _argtop(y_score, k) if not value: y_true = 1 - y_true r = np.nan_to_num(y_true[top]).sum() return r
def recall(y_true, y_score, k=None, value=True): """ Returns recall (number of positive examples) in the top k If value is False then counts number of negative examples TODO: add prop argument to return recall proportion instead of count """ y_true, y_score = to_float(y_true, y_score) top = _argtop(y_score, k) if not value: y_true = 1-y_true r = np.nan_to_num(y_true[top]).sum() return r
def true_score(y, outcome='true', score='score', **subset_args): y = y_subset(y, outcome=outcome, score=score, **subset_args) return util.to_float(y[outcome], y[score])