Esempio n. 1
0
def recall(y_true, y_pred, contingency=None, PBV=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    tp, _, fn, _ = PBV
    return tp / (tp + fn)
Esempio n. 2
0
def fowlkes_mallows(y_true, y_pred, contingency=None, PBV=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    tp, fp, fn, tn = PBV
    return np.sqrt((tp * tp) / ((tp + fp) * (tp + fn)))
Esempio n. 3
0
def jaccard_index(y_true, y_pred, contingency=None, PBV=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    tp, fp, fn, tn = PBV
    return tp / (tp + fp + fn)
Esempio n. 4
0
def accuracy(y_true, y_pred, contingency=None, PBV=None):
    # Note : Identical to Rand Index criterion, in context of pair counting
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    tp, fp, fn, tn = PBV
    return (tp + tn) / (tp + tn + fp + fn)
Esempio n. 5
0
def balanced_accuracy(y_true, y_pred, contingency=None, PBV=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    tp, fp, fn, tn = PBV
    a = tp / (tp + fn)
    b = tn / (tn + fp)
    return 0.5 * a + 0.5 * b
Esempio n. 6
0
def false_alarm_rate(y_true, y_pred, contingency=None, PBV=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV == None:
        PBV = utility.pair_based_values(contingency)
    _, fp, fn, _ = PBV
    if fp + fn == 0:
        return 0
    return fp / (fp + fn)
Esempio n. 7
0
def clustering_error(y_true, y_pred, contingency=None, PBV=None):
    # Note : It might be different to consider CE and Accuracy, especially considering subtle variation like micro/macro/weighted averaging.
    # However, in data pairs context, CE is always equal to 1 - Accuracy
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    print(
        "'Clustering Error' is redundant criterion ; Please use Accuracy criterion instead, and compute  1 - Accuracy"
    )
Esempio n. 8
0
def goodness(y_true, y_pred, contingency=None, PBV=None, pre=None, rec=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    if pre is None:
        pre = precision(y_true, y_pred, contingency, PBV)
    if rec is None:
        rec = recall(y_true, y_pred, contingency, PBV)
    return 0.5 * (pre + rec)
Esempio n. 9
0
def f_beta_score(y_true,
                 y_pred,
                 beta,
                 contingency=None,
                 PBV=None,
                 pre=None,
                 rec=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    if pre is None:
        pre = precision(y_true, y_pred, contingency, PBV)
    if rec is None:
        rec = recall(y_true, y_pred, contingency, PBV)
    beta2 = np.power(beta, 2)
    if pre * rec == 0:
        return 0
    return ((1 + beta2) * pre * rec) / (beta2 * pre + rec)
Esempio n. 10
0
def adjusted_rand_index(y_true,
                        y_pred,
                        contingency=None,
                        PBV=None,
                        a_i=None,
                        b_j=None):
    if contingency is None:
        contingency = compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j(contingency)
    tp, fp, fn, tn = PBV
    N = np.sum(a_i)
    ri = np.sum(comb(contingency,
                     2))  # note : not truly ri, missing division by comb(N,2)
    eri = (np.sum(comb(a_i, 2)) * np.sum(comb(b_j, 2))) / comb(N, 2)  # idem
    maxri = 0.5 * (np.sum(comb(a_i, 2)) + np.sum(comb(b_j, 2)))  # idem
    return (ri - eri) / (maxri - eri)
Esempio n. 11
0
def compute_prediction_metrics(y_true, y_pred):
    # Calculate criteria score for each one present in CRITERION_LIST

    # Calculating values that are presents in several formula of criterion
    contingency = utility.compute_contingency(y_true, y_pred)
    a_i = utility.compute_a_i(contingency)
    b_j = utility.compute_b_j(contingency)
    true_clustering_entropy = sklearn.metrics.cluster.entropy(y_true)
    predicted_clustering_entropy = sklearn.metrics.cluster.entropy(y_pred)
    PBV = utility.pair_based_values(contingency)

    # Getting all criterion values
    mi = converted_metrics.mutual_information(y_true, y_pred, contingency, a_i,
                                              b_j)
    ari = converted_metrics.adjusted_rand_index(y_true, y_pred, contingency,
                                                PBV, a_i, b_j)
    ami = converted_metrics.adjusted_mutual_information(
        y_true, y_pred, contingency, a_i, b_j, mi, true_clustering_entropy,
        predicted_clustering_entropy)
    compl = converted_metrics.homogeneity(y_true, y_pred, contingency, mi,
                                          predicted_clustering_entropy)
    homog = converted_metrics.completness(y_true, y_pred, contingency, mi,
                                          true_clustering_entropy)
    vmeasure = converted_metrics.v_measure(y_true, y_pred, 1, mi, contingency,
                                           predicted_clustering_entropy,
                                           true_clustering_entropy, homog,
                                           compl)
    entropy = new_metrics.cond_entropy(y_true, y_pred, contingency, a_i)
    #accuracy = converted_metrics.accuracy(y_true, y_pred, contingency, PBV)  Is equal to ARI in pair-based value context
    precision = converted_metrics.precision(y_true, y_pred, contingency, PBV)
    recall = converted_metrics.recall(y_true, y_pred, contingency, PBV)
    falsealarm = converted_metrics.false_alarm_rate(y_true, y_pred,
                                                    contingency, PBV)
    fm = converted_metrics.fowlkes_mallows(y_true, y_pred, contingency, PBV)
    f1 = converted_metrics.f_beta_score(y_true, y_pred, 1, contingency, PBV,
                                        precision, recall)
    purity = new_metrics.purity(y_true, y_pred, contingency)
    inversed_purity = new_metrics.inversed_purity(y_true, y_pred, contingency)
    epratio = new_metrics.ep_ratio(y_true, y_pred, contingency, a_i, entropy,
                                   purity)
    jaccard = converted_metrics.jaccard_index(y_true, y_pred, contingency, PBV)
    nmi = converted_metrics.normalized_mutual_information(
        y_true, y_pred, contingency, a_i, b_j, mi, true_clustering_entropy,
        predicted_clustering_entropy)
    ri = new_metrics.rand_index(y_true, y_pred, contingency, PBV)
    vi = new_metrics.variation_of_information(y_true, y_pred, contingency, a_i,
                                              b_j)
    # clustering error not calculated : always equal to 1 - accuracy
    goodness = converted_metrics.goodness(y_true, y_pred, contingency, PBV)
    bal_accuracy = converted_metrics.balanced_accuracy(y_true, y_pred,
                                                       contingency, PBV)
    q2 = new_metrics.q2(y_true, y_pred, contingency, entropy, a_i, b_j)

    metrics_dictionnary = {
        "mi": mi,
        "ari": ari,
        "ami": ami,
        "compl": compl,
        "homog": homog,
        "vmeasure": vmeasure,
        "entropy": entropy,
        "precision": precision,
        "recall": recall,
        "falsealarm": falsealarm,
        "fm": fm,
        "f1": f1,
        "purity": purity,
        "inv_purity": inversed_purity,
        "epratio": epratio,
        "jaccard": jaccard,
        "nmi": nmi,
        "ri": ri,
        "vi": vi,
        "goodness": goodness,
        "balacc": bal_accuracy,
        "q2": q2
    }

    if set(metrics_dictionnary.keys()) != CRITERION_LIST:
        print(
            "ERROR : One or several criterion are not computed in main.compute_prediction_metrics"
        )

    return metrics_dictionnary