コード例 #1
0
def precision(y_true, y_score, k=None, return_bounds=False):
    """
    If return_bounds is False then returns precision on the
        labeled examples in the top k.
    If return_bounds is True the returns a tuple containing:
        - precision on the labeled examples in the top k
        - number of labeled examples in the top k
        - lower bound of precision in the top k, assuming all
            unlabaled examples are False
        - upper bound of precision in the top k, assuming all
            unlabaled examples are True
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argtop(y_score, k)

    n = np.nan_to_num(y_true[top]).sum()  # fill missing labels with 0
    d = (~np.isnan(y_true[top])).sum()  # count number of labels
    p = n / d

    if return_bounds:
        k = len(y_true) if k is None else k
        bounds = (n / k, (n + k - d) / k) if k != 0 else (np.nan, np.nan)
        return p, d, bounds[0], bounds[1]
    else:
        return p
コード例 #2
0
ファイル: metrics.py プロジェクト: dssg/drain
def precision(y_true, y_score, k=None, return_bounds=False):
    """
    If return_bounds is False then returns precision on the 
        labeled examples in the top k.
    If return_bounds is True the returns a tuple containing:
        - precision on the labeled examples in the top k
        - number of labeled examples in the top k
        - lower bound of precision in the top k, assuming all 
            unlabaled examples are False
        - upper bound of precision in the top k, assuming all 
            unlabaled examples are True
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argtop(y_score, k)

    n = np.nan_to_num(y_true[top]).sum()    # fill missing labels with 0
    d = (~np.isnan(y_true[top])).sum()      # count number of labels
    p = n/d

    if return_bounds:
        k = len(y_true) if k is None else k
        bounds = (n/k, (n+k-d)/k) if k != 0 else (np.nan, np.nan)
        return p, d, bounds[0], bounds[1]
    else:
        return p
コード例 #3
0
ファイル: metrics.py プロジェクト: dssg/drain
def count(y_true, y_score=None, countna=False):
    """
    Counts the number of examples. If countna is False then only count labeled examples, i.e. those with y_true not NaN
    """
    if not countna:
        return (~np.isnan(to_float(y_true))).sum()
    else:
        return len(y_true)
コード例 #4
0
def count(y_true, y_score=None, countna=False):
    """
    Counts the number of examples. If countna is False then only count labeled examples,
    i.e. those with y_true not NaN
    """
    if not countna:
        return (~np.isnan(to_float(y_true))).sum()
    else:
        return len(y_true)
コード例 #5
0
def precision_series(y_true, y_score, k=None):
    """
    Returns series of length k whose i-th entry is the precision in the top i
    TODO: extrapolate here
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score, k)

    n = np.nan_to_num(y_true[top]).cumsum()  # fill missing labels with 0
    d = (~np.isnan(y_true[top])).cumsum()  # count number of labels
    return pd.Series(n / d, index=np.arange(1, len(n) + 1))
コード例 #6
0
ファイル: metrics.py プロジェクト: dssg/drain
def precision_series(y_true, y_score, k=None):
    """
    Returns series of length k whose i-th entry is the precision in the top i
    TODO: extrapolate here
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score, k)

    n = np.nan_to_num(y_true[top]).cumsum() # fill missing labels with 0
    d = (~np.isnan(y_true[top])).cumsum()   # count number of labels
    return pd.Series(n/d, index=np.arange(1,len(n)+1))
コード例 #7
0
def recall_series(y_true, y_score, k=None, value=True):
    """
    Returns series of length k whose i-th entry is the recall in the top i
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score, k)

    if not value:
        y_true = 1 - y_true

    a = np.nan_to_num(y_true[top]).cumsum()
    return pd.Series(a, index=np.arange(1, len(a) + 1))
コード例 #8
0
ファイル: metrics.py プロジェクト: dssg/drain
def recall_series(y_true, y_score, k=None, value=True):
    """
    Returns series of length k whose i-th entry is the recall in the top i
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score, k)

    if not value:
        y_true = 1-y_true

    a = np.nan_to_num(y_true[top]).cumsum()
    return pd.Series(a, index=np.arange(1,len(a)+1))
コード例 #9
0
def count_series(y_true, y_score, countna=False):
    """
    Returns series whose i-th entry is the number of examples in the top i
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score)

    if not countna:
        a = (~np.isnan(y_true[top])).cumsum()
    else:
        a = range(1, len(y_true) + 1)

    return pd.Series(a, index=range(1, len(a) + 1))
コード例 #10
0
ファイル: metrics.py プロジェクト: dssg/drain
def count_series(y_true, y_score, countna=False):
    """
    Returns series whose i-th entry is the number of examples in the top i 
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argsort(y_score)

    if not countna:
        a = (~np.isnan(y_true[top])).cumsum()
    else:
        a = range(1, len(y)+1)

    return pd.Series(a, index=range(1, len(a)+1))
コード例 #11
0
def recall(y_true, y_score, k=None, value=True):
    """
    Returns recall (number of positive examples) in the top k
    If value is False then counts number of negative examples
    TODO: add prop argument to return recall proportion instead of count
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argtop(y_score, k)

    if not value:
        y_true = 1 - y_true

    r = np.nan_to_num(y_true[top]).sum()

    return r
コード例 #12
0
ファイル: metrics.py プロジェクト: dssg/drain
def recall(y_true, y_score, k=None, value=True):
    """
    Returns recall (number of positive examples) in the top k
    If value is False then counts number of negative examples
    TODO: add prop argument to return recall proportion instead of count
    """
    y_true, y_score = to_float(y_true, y_score)
    top = _argtop(y_score, k)

    if not value:
        y_true = 1-y_true

    r = np.nan_to_num(y_true[top]).sum()

    return r
コード例 #13
0
def true_score(y, outcome='true', score='score', **subset_args):
    y = y_subset(y, outcome=outcome, score=score, **subset_args)
    return util.to_float(y[outcome], y[score])
コード例 #14
0
ファイル: model.py プロジェクト: dssg/drain
def true_score(y, outcome='true', score='score', **subset_args):
    y = y_subset(y, outcome=outcome, score=score, **subset_args) 
    return util.to_float(y[outcome], y[score])