Python UndefinedPerformance.UndefinedPerformanceの例、rlscore.measure.measure_utilities.UndefinedPerformance.UndefinedPerformance Pythonの例

コード例 #1

0

ファイルを表示

ファイル: cindex_measure.py プロジェクト: vivian457/RLScore

def cindex(Y, P):
    """Concordance, aka pairwise ranking accuracy. Computes the
    relative fraction of concordant pairs, that is, Y[i] > Y[j]
    and P[i] > P[j] (ties with P[i]=P[j] are assumed to be broken
    randomly). Equivalent to area under ROC curve, if Y[i] belong
    to {-1, 1}. An O(n*log(n)) implementation, based on order
    statistic tree computations.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, can be any real numbers. 
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    concordance index : float
        number between 0 and 1, around 0.5 means random performance
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same output")
    return np.mean(perfs)

コード例 #2

0

ファイルを表示

ファイル: rls.py プロジェクト: disc5/RLScore

class NfoldCV(object):
    def __init__(self, learner, measure, folds):
        self.rls = learner
        if measure == None:
            self.measure = sqerror
        else:
            self.measure = measure
        self.folds = folds

    def cv(self, regparam):
        rls = self.rls
        folds = self.folds
        measure = self.measure
        rls.solve(regparam)
        Y = rls.Y
        performances = []
        P_all = []
        for fold in folds:
            P = rls.holdout(fold)
            P_all.append(P)
            try:
                performance = measure(Y[fold], P)
                performances.append(performance)
            except UndefinedPerformance, e:
                pass
            #performance = measure_utilities.aggregate(performances)
        if len(performances) > 0:
            performance = np.mean(performances)
        else:
            raise UndefinedPerformance("Performance undefined for all folds")
        return performance, P_all

コード例 #3

0

ファイルを表示

ファイル: accuracy_measure.py プロジェクト: disc5/RLScore

def accuracy(Y, P):
    """Binary classification accuracy.
    
    A performance measure for binary classification problems.
    Returns the fraction of correct class predictions. P[i]>0 is
    considered a positive class prediction and P[i]<0 negative.
    P[i]==0 is considered as classifier abstaining to make a decision,
    which incurs 0.5 errors (in contrast to 0 error for correct and 1
    error for incorrect prediction).
    
    If 2-dimensional arrays are supplied as arguments, then accuracy
    is separately computed for each column, after which the accuracies
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    accuracy : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(accuracy_multitask(Y, P))

コード例 #4

0

ファイルを表示

def fscore(Y, P):
    """F1-Score.
    
    A performance measure for binary classification problems.
    F1 = 2*(Precision*Recall)/(Precision+Recall)
    
    If 2-dimensional arrays are supplied as arguments, then macro-averaged
    F-score is computed over the columns.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. P[i]>0 is treated
        as a positive, and P[i]<=0 as a negative class prediction.
    
    Returns
    -------
    fscore : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(fscore_multitask(Y, P))

コード例 #5

0

ファイルを表示

class LQOCV(object):
    
    def __init__(self, learner, measure):
        self.rls = learner
        self.measure = measure

    def cv(self, regparam):
        rls = self.rls
        measure = self.measure
        rls.solve(regparam)
        Y = rls.Y
        performances = []
        folds = rls.qids
        for fold in folds:
            P = rls.computeHO(fold)
            try:
                performance = measure(Y[fold], P)
                performances.append(performance)
            except UndefinedPerformance, e:
                pass
            #performance = measure_utilities.aggregate(performances)
        if len(performances) > 0:
            performance = np.mean(performances)
        else:
            raise UndefinedPerformance("Performance undefined for all folds")
        return performance

コード例 #6

0

ファイルを表示

ファイル: sq_mprank_measure.py プロジェクト: disc5/RLScore

def sqmprank(Y, P):
    """Squared magnitude preserving ranking error.
    
    A performance measure for ranking problems. Computes the sum of (Y[i]-Y[j]-P[i]+P[j])**2
    over all index pairs. normalized by the number of pairs. For query-structured data,
    one would typically want to compute the error separately for each query, and average.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error : float
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(sqmprank_multitask(Y, P))

コード例 #7

0

ファイルを表示

ファイル: cindex_measure.py プロジェクト: lenovor/RLScore

def cindex_singletask(Y, P):
    Y = np.array(Y).T[0]
    P = np.array(P).T[0]
    correct = Y.astype(np.float64)
    predictions = P.astype(np.float64)
    assert len(correct) == len(predictions)
    C = array(correct).reshape(len(correct), )
    C.sort()
    pairs = 0
    c_ties = 0
    for i in range(1, len(C)):
        if C[i] != C[i - 1]:
            c_ties = 0
        else:
            c_ties += 1
        #this example forms a pair with each previous example, that has a lower value
        pairs += i - c_ties
    if pairs == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same output")
    correct = array(correct).reshape(correct.shape[0], )
    predictions = array(predictions).reshape(predictions.shape[0], )
    s = swapped.count_swapped(correct, predictions)
    disagreement = float(s) / float(pairs)
    return 1. - disagreement

コード例 #8

0

ファイルを表示

def sqerror(Y, P):
    """Mean squared error.
    
    A performance measure for regression problems. Computes the sum of (Y[i]-P[i])**2
    over all index pairs, normalized by the number of instances.
    
    If 2-dimensional arrays are supplied as arguments, then error is separately computed for
    each column, after which the errors are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Correct utility values, can be any real numbers
    P : {array-like}, shape = [n_samples] or [n_samples, n_tasks]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    error : float
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(sqerror_multitask(Y,P))

コード例 #9

0

ファイルを表示

def ova_accuracy(Y, P):
    """One-vs-all classification accuracy for multi-class problems.
    
    Computes the accuracy for a one-versus-all decomposed classification
    problem. Each column in Y and P correspond to one possible class label.
    On each row, exactly one column in Y is 1, all the rest must be -1. The
    prediction for the i:th example is computed by taking the argmax over
    the indices of row i in P. 
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_classes]
        Correct labels, must belong to set {-1,1}, with exactly
        one 1 on each row.
    P : {array-like}, shape = [n_samples] or [n_samples, n_classes]
        Predicted labels, can be any real numbers.
    
    Returns
    -------
    accuracy : float
        number between 0 and 1
    """
    Y = np.array(Y)
    P = np.array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    Y = np.argmax(Y, axis=1)
    P = np.argmax(P, axis=1)
    return np.mean(Y == P)

コード例 #10

0

ファイルを表示

ファイル: rls.py プロジェクト: skiser/RLScore

 def cv_old(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     aucs = []
     for k in range(Y.shape[1]):
         pairs_start_inds, pairs_end_inds = [], []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i,k] > Y[j,k]:
                     pairs_start_inds.append(i)
                     pairs_end_inds.append(j)
                 elif Y[i,k] < Y[j,k]:
                     pairs_start_inds.append(j)
                     pairs_end_inds.append(i)
         if len(pairs_start_inds) == 0:
             raise UndefinedPerformance("Leave-pair-out undefined, all labels same for output %d" %k)
         pred_start, pred_end = rls.leave_pair_out(np.array(pairs_start_inds), np.array(pairs_end_inds))
         pred_start = array_tools.as_2d_array(pred_start)
         pred_end = array_tools.as_2d_array(pred_end)
         auc = 0.
         for h in range(len(pred_start)):
             if pred_start[h,k] > pred_end[h,k]:
                 auc += 1.
             elif pred_start[h,k] == pred_end[h,k]:
                 auc += 0.5
         auc /= len(pairs_start_inds)
         aucs.append(auc)
     auc = np.mean(aucs)
     return auc, None

コード例 #11

0

ファイルを表示

def auc(Y, P):
    """Area under the ROC curve (AUC).
    
    A performance measure for binary classification problems.
    Can be interpreted as an estimate of the probability, that
    the classifier is able to discriminate between a randomly
    drawn positive and negative training examples. An O(n*log(n))
    time implementation, with correction for tied predictions.
    
    If 2-dimensional arrays are supplied as arguments, then AUC
    is separately computed for each column, after which the AUCs
    are averaged.
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels, must belong to set {-1,1}
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels, can be any real numbers. 
    
    Returns
    -------
    auc : float
        number between 0 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(auc_multitask(Y, P))

コード例 #12

0

ファイルを表示

ファイル: accuracy_measure.py プロジェクト: disc5/RLScore

def accuracy_singletask(Y, P):
    assert Y.shape[0] == P.shape[0]
    if not np.all((Y == 1) + (Y == -1)):
        raise UndefinedPerformance(
            "binary classification accuracy accepts as Y-values only 1 and -1")
    vlen = float(Y.shape[0])
    perf = np.sum(np.sign(np.multiply(Y, P)) + 1.) / (2 * vlen)
    return perf

コード例 #13

0

ファイルを表示

ファイル: cindex_measure.py プロジェクト: lenovor/RLScore

def cindex(Y, P):
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = cindex_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same output")
    return np.mean(perfs)

コード例 #14

0

ファイルを表示

ファイル: accuracy_measure.py プロジェクト: disc5/RLScore

def accuracy_multitask(Y, P):
    Y = np.mat(Y)
    P = np.mat(P)
    if not np.all((Y == 1) + (Y == -1)):
        raise UndefinedPerformance(
            "binary classification accuracy accepts as Y-values only 1 and -1")
    vlen = float(Y.shape[0])
    performances = np.sum(np.sign(np.multiply(Y, P)) + 1., axis=0) / (2 * vlen)
    performances = np.array(performances)[0]
    return performances

コード例 #15

0

ファイルを表示

ファイル: rls.py プロジェクト: disc5/RLScore

 def cv(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     #Union of all pairs for which predictions are needed
     all_pairs = set([])
     for k in range(Y.shape[1]):
         pairs = []
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i, k] != Y[j, k]:
                     pairs.append((i, j))
         #If all labels for some column are same, ranking accuracy is undefined
         if len(pairs) == 0:
             raise UndefinedPerformance(
                 "Leave-pair-out undefined, all labels same for output %d" %
                 k)
         all_pairs.update(pairs)
     all_start_inds = [x[0] for x in all_pairs]
     all_end_inds = [x[1] for x in all_pairs]
     #Compute leave-pair-out predictions for all pairs
     all_start_inds = np.array(all_start_inds)
     all_end_inds = np.array(all_end_inds)
     pred_start, pred_end = rls.leave_pair_out(all_start_inds, all_end_inds)
     pred_start = array_tools.as_2d_array(pred_start)
     pred_end = array_tools.as_2d_array(pred_end)
     pair_dict = dict(zip(all_pairs, range(pred_start.shape[0])))
     aucs = []
     #compute auc/ranking accuracy for each column of Y separately
     for k in range(Y.shape[1]):
         comparisons = []
         #1 if the true and predicted agree, 0 if disagree, 0.5 if predictions tied
         for i in range(Y.shape[0] - 1):
             for j in range(i + 1, Y.shape[0]):
                 if Y[i, k] > Y[j, k]:
                     ind = pair_dict[(i, j)]
                     if pred_start[ind, k] > pred_end[ind, k]:
                         comparisons.append(1.)
                     elif pred_start[ind, k] == pred_end[ind, k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
                 elif Y[i, k] < Y[j, k]:
                     ind = pair_dict[(i, j)]
                     if pred_start[ind, k] < pred_end[ind, k]:
                         comparisons.append(1.)
                     elif pred_start[ind, k] == pred_end[ind, k]:
                         comparisons.append(0.5)
                     else:
                         comparisons.append(0.)
         auc = np.mean(comparisons)
         aucs.append(auc)
     #Take the mean of all columnwise aucs
     auc = np.mean(aucs)
     return auc, None

コード例 #16

0

ファイルを表示

def auc_singletask(Y, P):
    #the implementation has n(log(n)) time complexity
    #P: predicted labels
    #Y: true labels, y_i \in {-1,1} for each y_i \in Y
    #
    if not np.all((Y == 1) + (Y == -1)):
        raise UndefinedPerformance("auc accepts as Y-values only 1 and -1")
    size = len(P)
    #form a list of prediction-label pairs
    I = np.argsort(P)
    Y = Y[I]
    P = P[I]
    poscount = 0.
    #The number of positive labels that have the same prediction
    #as the current P[i] value
    posties = 0.
    #Number of pairwise mistakes this far
    errors = 0.
    j = 0
    for i in range(size):
        #j points always to the next entry in P for which
        #P[j] > P[i]. In the end j will point outside of P
        if j == i:
            poscount += posties
            posties = 0.
            while j < size and P[i] == P[j]:
                if Y[j] == 1:
                    posties += 1
                j += 1
        if Y[i] == -1:
            #every pairwise inversion of positive-negative pair
            #incurs one error, except for ties where it incurs 0.5
            #errors
            errors += poscount + 0.5 * posties
    poscount += posties
    #the number of positive-negative pairs
    paircount = poscount * (size - poscount)
    #AUC is 1 - number of pairwise errors
    if paircount == 0:
        raise UndefinedPerformance("AUC undefined if both classes not present")
    AUC = 1. - errors / paircount
    return AUC

コード例 #17

0

ファイルを表示

ファイル: all_pairs_rankrls.py プロジェクト: lenovor/RLScore

 def cv(self, regparam):
     rls = self.rls
     rls.solve(regparam)
     Y = rls.Y
     perfs = []
     #special handling for concordance index / auc
     if self.measure.func_name in ["cindex", "auc"]:
         for index in range(Y.shape[1]):
             pairs = []
             for i in range(Y.shape[0] - 1):
                 for j in range(i + 1, Y.shape[0]):
                     if Y[i, index] > Y[j, index]:
                         pairs.append((i, j))
                     elif Y[i, index] < Y[j, index]:
                         pairs.append((j, i))
             if len(pairs) > 0:
                 pred = rls.computePairwiseCV(pairs, index)
                 auc = 0.
                 for pair in pred:
                     if pair[0] > pair[1]:
                         auc += 1.
                     elif pair[0] == pair[1]:
                         auc += 0.5
                 auc /= len(pred)
                 perfs.append(auc)
         if len(perfs) > 0:
             performance = np.mean(perfs)
         else:
             raise UndefinedPerformance(
                 "Performance undefined for all folds")
         return performance
     else:
         #Horribly inefficient, but maybe OK for small data sets
         pairs = []
         for i in range(Y.shape[0]):
             for j in range(Y.shape[0]):
                 pairs.append((i, j))
         for index in range(Y.shape[1]):
             preds = rls.computePairwiseCV(pairs, index)
             for i in range(len(pairs)):
                 pair = pairs[i]
                 pred = preds[i]
                 perfs.append(
                     self.measure(
                         np.array([Y[pair[0], index], Y[pair[1], index]]),
                         np.array(pred)))
         perf = np.mean(perfs)
         return perf

コード例 #18

0

ファイルを表示

ファイル: multi_accuracy_measure.py プロジェクト: disc5/RLScore

def ova_accuracy(Y, P):
    """One-vs-all classification accuracy for multi-class problems.
    
    Computes the accuracy for a one-versus-all decomposed classification
    problem. Each column in Y and P correspond to one possible class label.
    On each row, exactly one column in Y is 1, all the rest must be -1. The
    prediction for the i:th example is computed by taking the argmax over
    the indices of row i in P. 
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_classes]
        Correct labels, must belong to set {-1,1}, with exactly
        one 1 on each row.
    P : {array-like}, shape = [n_samples] or [n_samples, n_classes]
        Predicted labels, can be any real numbers.
    
    Returns
    -------
    accuracy : float
        number between 0 and 1
    """
    Y = np.array(Y)
    P = np.array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    correct = 0
    for i in range(Y.shape[0]):
        largest_pred = None
        predicted = None
        true = None
        for j in range(Y.shape[1]):
            if Y[i, j] == 1:
                true = j
            if (not largest_pred) or (P[i, j] > largest_pred):
                largest_pred = P[i, j]
                predicted = j
        if true == predicted:
            correct += 1
    perf = float(correct) / float(Y.shape[0])
    return perf

コード例 #19

0

ファイルを表示

 def cv(self, regparam):
     rls = self.rls
     measure = self.measure
     rls.solve(regparam)
     Y = rls.Y
     performances = []
     predictions = []
     folds = rls.qidlist
     for fold in folds:
         P = rls.holdout(fold)
         predictions.append(P)
         try:
             performance = measure(Y[fold], P)
             performances.append(performance)
         except UndefinedPerformance:
             pass
     if len(performances) > 0:
         performance = np.mean(performances)
     else:
         raise UndefinedPerformance("Performance undefined for all folds")
     return performance, predictions

コード例 #20

0

ファイルを表示

def spearman(Y, P):
    """Spearman correlation.
    
    
    Parameters
    ----------
    Y : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct labels
    P : {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted labels
    
    Returns
    -------
    correlation : float
        number between -1 and 1
    """
    Y = array_tools.as_2d_array(Y)
    P = array_tools.as_2d_array(P)
    if not Y.shape == P.shape:
        raise UndefinedPerformance("Y and P must be of same shape")
    return np.mean(spearman_multitask(Y, P))

コード例 #21

0

ファイルを表示

def cindex_singletask_SLOW(Y, P):
    correct = Y
    predictions = P
    assert len(correct) == len(predictions)
    disagreement = 0.
    decisions = 0.
    for i in range(len(correct)):
        for j in range(len(correct)):
                if correct[i] > correct[j]:
                    decisions += 1.
                    if predictions[i] < predictions[j]:
                        disagreement += 1.
                    elif predictions[i] == predictions[j]:
                        disagreement += 0.5
    #Disagreement error is not defined for cases where there
    #are no disagreeing pairs
    if decisions == 0:
        raise UndefinedPerformance("No pairs, all the instances have the same  output")
    else:
        disagreement /= decisions
    return 1. - disagreement

コード例 #22

0

ファイルを表示

ファイル: rls.py プロジェクト: skiser/RLScore

 def cv(self, regparam):
     rls = self.rls
     folds = self.folds
     measure = self.measure
     rls.solve(regparam)
     Y = rls.Y
     performances = []
     P_all = []
     for fold in folds:
         P = rls.holdout(fold)
         P_all.append(P)
         try:
             performance = measure(Y[fold], P)
             performances.append(performance)
         except UndefinedPerformance as e:
             pass #No warning printed
         #performance = measure_utilities.aggregate(performances)
     if len(performances) > 0:
         performance = np.mean(performances)
     else:
         raise UndefinedPerformance("Performance undefined for all folds")
     return performance, P_all

コード例 #23

0

ファイルを表示

def disagreement(Y, P):
    """Disagreement error, also known as the pairwise ranking error.
    
    A performance measure for ranking problems. Computes the number
    of pairwise disagreements between the correct and predicted rankings.
    An O(n^2)-time implementation, can be slow for large problems (loglinear
    time implementation would be possible using search trees). For query-structured
    data, one would typically want to compute the disagreement separately for each query,
    and average.
    
    If 2-dimensional arrays are supplied as arguments, then disagreement
    is separately computed for each column, after which the disagreements
    are averaged.
    
    Parameters
    ----------
    Y: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Correct utility values, can be any real numbers
    P: {array-like}, shape = [n_samples] or [n_samples, n_labels]
        Predicted utility values, can be any real numbers. 
    
    Returns
    -------
    disagreement: float
        number between 0 and 1
    """
    Y = array_tools.as_labelmatrix(Y)
    P = array_tools.as_labelmatrix(P)
    perfs = disagreement_multitask(Y, P)
    perfs = np.array(perfs)
    perfs = perfs[np.invert(np.isnan(perfs))]
    if len(perfs) == 0:
        raise UndefinedPerformance(
            "No pairs, all the instances have the same label")
    perf = np.mean(perfs)
    return perf

コード例 #24

0

ファイルを表示

def fscore_singletask(Y, P):
    correct = Y
    predictions = P
    if not np.all((Y == 1) + (Y == -1)):
        raise UndefinedPerformance("fscore accepts as Y-values only 1 and -1")
    assert len(correct) == len(predictions)
    TP = 0
    FP = 0
    FN = 0
    for i in range(len(correct)):
        if correct[i] == 1:
            if predictions[i] > 0.:
                TP += 1
            else:
                FN += 1
        elif correct[i] == -1:
            if predictions[i] > 0.:
                FP += 1
        else:
            assert False
    P = float(TP) / (TP + FP)
    R = float(TP) / (TP + FN)
    F = 2. * (P * R) / (P + R)
    return F