Exemple #1
0
def select_confusion_pairs(true_label, predicted_label, ratio_cutoff=0.001):
    """
    Select cluster pairs that are confusing (ratio_cutoff) between true and predicted labels

    Parameters
    ----------
    true_label : true cell labels
    predicted_label : predicted cell labels
    ratio_cutoff : ratio of clusters cutoff to define confusion

    Returns
    -------
    confused_pairs :
        list of cluster pair tuples
    """
    labels = pd.DataFrame({"true": true_label, "pred": predicted_label})
    confusion_matrix = (labels.groupby(
        "true")["pred"].value_counts().unstack().fillna(0).astype(int))

    row_sum = confusion_matrix.sum(axis=1)
    row_norm = (confusion_matrix / row_sum.values[:, None]).unstack()
    row_pairs = row_norm[row_norm > ratio_cutoff].reset_index().iloc[:, :2]
    col_sum = confusion_matrix.sum(axis=0)
    col_norm = (confusion_matrix / col_sum.values[None, :]).unstack()
    col_pairs = col_norm[col_norm > ratio_cutoff].reset_index().iloc[:, :2]

    include_pairs = set()
    for _, s in pd.concat([row_pairs, col_pairs]).iterrows():
        a, b = s.sort_values()
        if a == b:
            continue
        include_pairs.add((a, b))
    return list(include_pairs)
Exemple #2
0
def evaluate_metrics(confusion_matrix):
    # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)
    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)
    # ACC_micro = (sum(TP) + sum(TN)) / (sum(TP) + sum(FP) + sum(FN) + sum(TN))
    ACC_macro = np.mean(
        ACC
    )  # to get a sense of effectiveness of our method on the small classes we computed this average (macro-average)

    return ACC_macro, ACC, TPR, TNR, PPV
Exemple #3
0
    def confusion_derivations(self, confusion_matrix, multi=True):
        """ Get derivations of confusion matrix """

        # Basic derivations
        if confusion_matrix.shape == (2, 2) and multi is False:
            # Binary
            TN, FP, FN, TP = confusion_matrix.ravel()
        else:
            # Multiclass
            FP = (confusion_matrix.sum(axis=0) -
                  np.diag(confusion_matrix)).astype(float)
            FN = (confusion_matrix.sum(axis=1) -
                  np.diag(confusion_matrix)).astype(float)
            TP = (np.diag(confusion_matrix)).astype(float)
            TN = (confusion_matrix.sum() - (FP + FN + TP)).astype(float)

        P = (TP + FN).astype(float)
        N = (TN + FP).astype(float)

        # Add everything to dictonary
        metrics = {'P':P.astype(int),'N':N.astype(int), \
                   'TP':TP.astype(int),'FP':FP.astype(int),\
                   'TN':TN.astype(int),'FN':FN.astype(int)}
        # Recall
        metrics['TPR'] = TP / P
        # Specificicty
        metrics['TNR'] = TN / N
        # Precision
        metrics['PPV'] = TP / (TP + FP)
        # Negative predictive value
        metrics['NPV'] = TN / (TN + FN)
        # False negative rate
        metrics['FNR'] = 1 - metrics['TPR']
        # False positive rate
        metrics['FPR'] = 1 - metrics['TNR']
        # False discovery rate
        metrics['FPR'] = 1 - metrics['PPV']
        # False Omission rate
        metrics['FOR'] = 1 - metrics['NPV']
        # Critical Success Index
        metrics['TS'] = TP / (TP + FN + FP)
        # Accuracy
        metrics['ACC'] = (TP + TN) / (P + N)
        # Balanced Accuracy
        metrics['BACC'] = (metrics['TPR'] + metrics['TNR']) / 2
        # Predicted positive condition rate
        metrics['PPCR'] = (TP + FP) / (TP + FP + TN + FN)
        # F1-score
        metrics['F1'] = 2 * (metrics['PPV'] * metrics['TPR']) / (
            metrics['PPV'] + metrics['TPR'])
        # Matthews correlation coefficient
        metrics['MCC'] = ((TP * TN) - (FP * FN)) / (np.sqrt(
            ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))))
        # Fowlkes-Mallows index
        metrics['FM'] = np.sqrt(metrics['PPV'] * metrics['TPR'])

        # Return metrics
        return metrics
Exemple #4
0
def multi_class_tn_fp(confusion_matrix):
    """
    :param confusion_matrix: Take in a multi class >2 confusion matrix
    :return: tn,fp,fn,tp values
    """
    fp = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    fn = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    tp = np.diag(confusion_matrix)
    tn = confusion_matrix.sum() - (fp + fn + tp)
    return tn, fp, fn, tp
Exemple #5
0
def stats_from_confusion_matrix_sum_macro_micro(confusion_matrix, classes,
                                                flow_timeout,
                                                activity_timemout, classifier,
                                                n_features):
    columns_stat_classes = [
        "APP", "FLOW_TIMEOUT", "ACTIVITY_TIMEOUT", "CLASSIFIER", "N_FEATURES",
        "TRUE_POSITIVE", "FALSE_POSITIVE", "TRUE_NEGATIVE", "FALSE_NEGATIVE",
        "PRECISION", "RECALL", "F1", "ACCURACY"
    ]
    stat_classes = pandas.DataFrame(columns=columns_stat_classes)

    columns_per_classifier = ["CLASSIFIER", "N_FEATURES", "FLOW_TIMEOUT", "ACTIVITY_TIMEOUT", \
                            "AVERAGE_ACCURACY", "ERROR_RATE", "MICRO_PRECISION", "MICRO_RECALL", \
                            "MICRO_F1_SCORE", "MACRO_PRECISION", "MACRO_RECALL", "MACRO_F1_SCORE"]
    stat_classifier = pandas.DataFrame(columns=columns_per_classifier)
    #  a.loc[len(a)]=[1,2,3]
    True_Positive = np.diag(confusion_matrix)
    False_Positive = confusion_matrix.sum(axis=0) - True_Positive
    False_Negative = confusion_matrix.sum(axis=1) - True_Positive
    True_Negative = confusion_matrix.sum() - (True_Positive + False_Negative +
                                              False_Positive)
    Precision = True_Positive / (True_Positive + False_Positive)
    Recall = True_Positive / (True_Positive + False_Negative)
    F1_score = 2 * (Precision * Recall) / (Precision + Recall)
    Accuracy = (True_Positive + True_Negative) / (
        True_Positive + True_Negative + False_Positive + False_Negative)
    for i in range(len(classes)):
        stat_classes.loc[len(stat_classes)] = [
            classes[i], flow_timeout, activity_timemout, classifier,
            n_features, True_Positive[i], False_Positive[i], True_Negative[i],
            False_Negative[i], Precision[i], Recall[i], F1_score[i],
            Accuracy[i]
        ]
    # return stat_classes
    Average_Accuracy = (Accuracy.sum()) / len(classes)
    Error_Rate_per_class = (False_Positive + False_Negative) / (
        True_Positive + True_Negative + False_Positive + False_Negative)
    Error_Rate = (Error_Rate_per_class.sum()) / len(classes)
    Micro_Precision = ((True_Positive.sum())) / ((True_Positive.sum()) +
                                                 (False_Positive.sum()))
    Micro_Recall = ((True_Positive.sum())) / ((True_Positive.sum()) +
                                              (False_Negative.sum()))
    Micro_F1_score = 2 * (Micro_Precision * Micro_Recall) / (Micro_Precision +
                                                             Micro_Recall)
    Macro_Precision = (Precision.sum()) / len(classes)
    Macro_Recall = (Recall.sum()) / len(classes)
    Macro_F1_score = 2 * (Macro_Precision * Macro_Recall) / (Macro_Precision +
                                                             Macro_Recall)
    stat_classifier.loc[(len(stat_classifier))] = [
        classifier, n_features, flow_timeout, activity_timemout,
        Average_Accuracy, Error_Rate, Micro_Precision, Micro_Recall,
        Micro_F1_score, Macro_Precision, Macro_Recall, Macro_F1_score
    ]
    return stat_classes, stat_classifier
 def _get_scores(confusion_matrix):
     """
     Use the confusion matrix to get precision, recall, F1 score.
     :param confusion_matrix: ndarray
     :return: None, prints scores to the console
     """
     d = np.diag(confusion_matrix) #Diagonals, TP
     precision = d / confusion_matrix.sum(axis=1)
     recall = d / confusion_matrix.sum(axis=0)
     F1 = 2 / ((1/precision) + (1/recall))
     for i in range(confusion_matrix.shape[0]):
         print("Class {} | Precision = {:.3f} | Recall = {:.3f} | F1 = {:.3f}"
               .format(i, precision[i], recall[i], F1[i]))
def IoU(confusion_matrix):
    '''
    calculate the intersection over Union for each class
    '''
    intersection = np.diag(confusion_matrix)
    union_part = confusion_matrix.sum(axis=0) + confusion_matrix.sum(
        axis=1) - np.diag(confusion_matrix)
    #    # set the value to 1 to avoid dividing zero problem,
    #    # based on the characteristic of confusion value
    #    union_part[union_part==0] = 1
    with np.errstate(divide='ignore', invalid='ignore'):
        IoU = intersection / union_part
    return IoU
Exemple #8
0
def metrics (confusion_matrix):
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)


    return TPR, TNR
def evaluate_metrics(confusion_matrix,
                     y_test,
                     y_pred,
                     print_result=False,
                     f1_avg='macro'):
    # https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal

    TP = np.diag(confusion_matrix)
    FP = confusion_matrix.sum(axis=0) - TP
    FN = confusion_matrix.sum(axis=1) - TP
    TN = confusion_matrix.sum() - (FP + FN + TP)
    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)

    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)
    # ACC_micro = (sum(TP) + sum(TN)) / (sum(TP) + sum(FP) + sum(FN) + sum(TN))
    ACC_macro = np.mean(
        ACC
    )  # to get a sense of effectiveness of our method on the small classes we computed this average (macro-average)

    f1 = f1_score(y_test, y_pred, average=f1_avg)
    kappa = cohen_kappa_score(y_test, y_pred)

    if (print_result):
        print("\n")
        print("\n")
        print("============ METRICS ============")
        print(confusion_matrix)
        print("Accuracy (macro) : ", ACC_macro)
        print("F1 score         : ", f1)
        print("Cohen Kappa score: ", kappa)
        print("======= Per class metrics =======")
        print("Accuracy         : ", ACC)
        print("Sensitivity (TPR): ", TPR)
        print("Specificity (TNR): ", TNR)
        print("Precision (+P)   : ", PPV)

    return ACC_macro, ACC, TPR, TNR, PPV, f1, kappa
def weighted_pixel_accuracy_class(confusion_matrix):
    '''
    calculate class pixel accuracy based on confusion matrix
    
    '''
    # set the value to 1 to avoid dividing zero problem,
    # based on the characteristic of confusion value
    freq = confusion_matrix.sum(axis=1) / confusion_matrix.sum()
    num_each_class = confusion_matrix.sum(axis=1)
    #    num_each_class[num_each_class==0] = 1
    with np.errstate(divide='ignore', invalid='ignore'):
        cls_PA = np.diag(confusion_matrix) / num_each_class
        w_cls_PA = np.multiply(freq[cls_PA >= 0], cls_PA[cls_PA >= 0]).sum()
    return w_cls_PA
    def plot_per_class(self, conf_matrix):
        '''plot and save per class (per aa) accuracy'''
        cm = conf_matrix

        # delete start/stop/O,U in both row and columns
        cm = np.delete(cm, [0, 12, 22, 23, 24], 0)
        cm = np.delete(cm, [0, 12, 22, 23, 24], 1)
        #swap L and F so L close to V,I
        cm[:, [19, 16]] = cm[:, [16, 19]]
        cm[[19, 16], :] = cm[[16, 19], :]
        #swap F and W
        cm[:, [19, 18]] = cm[:, [18, 19]]
        cm[[19, 18], :] = cm[[18, 19], :]
        #swap G and P
        cm[:, [11, 12]] = cm[:, [12, 11]]
        cm[[11, 12], :] = cm[[12, 11], :]

        # normalise per column - taken from scikitlearn
        per_class_acc = cm.diagonal() / cm.sum(axis=1)

        # define labels
        classes = [self.int_to_aa[i] for i in np.arange(25)]
        classes = np.delete(classes, [0, 12, 22, 23, 24], 0)
        classes[[19, 16]] = classes[[16, 19]]  #swap L and F so L close to V,I
        classes[[19, 18]] = classes[[18, 19]]  #swap F and W
        classes[[11, 12]] = classes[[12, 11]]  #swap G and P

        # plot
        fig = plt.figure(figsize=[9, 9])
        plt.bar(classes, height=per_class_acc)

        # plot settings
        tick_marks = np.arange(20)
        plt.xticks(tick_marks, classes, fontsize=14, rotation=45)
        plt.yticks(fontsize=14)
        plt.title('Per amino acid accuracy', fontsize=20)

        # total of each aa written on the top of each bar
        for i in np.arange(20):
            plt.text(i - 0.8,
                     y=per_class_acc[i] + 0.01,
                     s='{0:.1}'.format(cm.sum(axis=1)[:, np.newaxis][i][0]),
                     size=12,
                     rotation=45)

        plt.savefig(self.working_dir +
                    '/plot_acc_per_aa_{}.png'.format(self.ds_type))

        plt.close('all')
def metrics(confusion_matrix):
    x = [i for i in range(25) if i != 9]
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    print('False Positive', FP)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    print('False Negative', FN)
    TP = np.diag(confusion_matrix)
    print('True Positive', TP)
    TN = confusion_matrix.sum() - (FP + FN + TP)
    print('True Negative', TN)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    print('True Positive Rate:', TPR)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    print('True Negative Rate:', TNR)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    print('Positive Predictive Value:', PPV)
    # Negative predictive value
    NPV = TN / (TN + FN)
    print('Negative Predictive Value:', NPV)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    print('False Positive Rate:', FPR)
    # False negative rate
    FNR = FN / (TP + FN)
    print('False Negative Rate:', FNR)
    # False discovery rate
    FDR = FP / (TP + FP)
    print('False Discovery Rate:', FDR)
    # Overall accuracy
    ACC = (TP + TN) / (TP + FP + FN + TN)
    print('Overall Accuracy:', ACC)

    plt.subplot(221)
    plt.bar(x, FP)
    plt.title('False Posititve')
    plt.subplot(222)
    plt.bar(x, FN)
    plt.title('False Negative')
    plt.subplot(223)
    plt.bar(x, TP)
    plt.title('True Positive')
    plt.subplot(224)
    plt.bar(x, TN)
    plt.title('True Negative')
    plt.show()
Exemple #13
0
def mr_metrics(confusion_matrix, level):

    confusion_matrix = confusion_matrix.astype(float)
    # sum(0) <- predicted sum(1) ground truth

    total = np.sum(confusion_matrix)
    n_classes, _ = confusion_matrix.shape
    overall_accuracy = np.sum(np.diag(confusion_matrix)) / total

    # calculate Cohen Kappa (https://en.wikipedia.org/wiki/Cohen%27s_kappa)
    N = total
    p0 = np.sum(np.diag(confusion_matrix)) / N
    pc = np.sum(
        np.sum(confusion_matrix, axis=0) *
        np.sum(confusion_matrix, axis=1)) / N**2
    kappa = (p0 - pc) / (1 - pc)

    recall = np.diag(confusion_matrix) / (np.sum(confusion_matrix, axis=1) +
                                          1e-12)
    precision = np.diag(confusion_matrix) / (np.sum(confusion_matrix, axis=0) +
                                             1e-12)
    f1 = (2 * precision * recall) / ((precision + recall) + 1e-12)

    if level == 'global':
        return overall_accuracy, kappa, np.mean(precision), np.mean(
            recall), np.mean(f1)
    elif level == 'perclass':
        # Per class accuracy
        cl_acc = np.diag(confusion_matrix) / (confusion_matrix.sum(1) + 1e-12)

        return overall_accuracy, kappa, precision, recall, f1, cl_acc
Exemple #14
0
def validate(X_test, y_test, pipe, title, fileName):
    
    print('Test Accuracy: %.3f' % pipe.score(X_test, y_test))

    y_predict = pipe.predict(X_test)

    confusion_matrix = np.zeros((9,9))

    for p,r in zip(y_predict, y_test):
        confusion_matrix[p-1,r-1] = confusion_matrix[p-1,r-1] + 1

    print (confusion_matrix) 

    confusion_normalized = confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
    print (confusion_normalized)

    pylab.clf()
    pylab.matshow(confusion_normalized, fignum=False, cmap='Blues', vmin=0.0, vmax=1.0)
    ax = pylab.axes()
    ax.set_xticks(range(len(families)))
    ax.set_xticklabels(families,  fontsize=4)
    ax.xaxis.set_label_position('top') 
    ax.xaxis.set_ticks_position("top")
    ax.set_yticks(range(len(families)))
    ax.set_yticklabels(families, fontsize=4)
    pylab.title(title)
    pylab.colorbar()
    pylab.grid(False)
    pylab.grid(False)
    pylab.savefig(fileName, dpi=900)
    def get_confusion_classes(self):

        # id classes
        dict_int_to_string = dict((i, c) for i, c in enumerate(self.Y_labels))

        # confusion matrix
        confusion_matrix = self.get_confusion_matrix()

        # dataframe with classes and their biggest confusion class
        cm = confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:,np.newaxis]
        cm[np.isnan(cm)] = 0

        # excluding the right classification
        np.fill_diagonal(a=cm, val=0)

        # get biggest confusion values
        cm_max_class = np.argmax(a=cm, axis=1)
        cm_max_value = np.max(a=cm, axis=1)

        target_int = list(range(0, cm.shape[0], 1))
        target_class = [dict_int_to_string[i] for i in target_int]
        confusion_class = [dict_int_to_string[i] for i in cm_max_class]

        df_report = pd.DataFrame()
        df_report['target'] = target_class
        df_report['confusion'] = confusion_class
        df_report['value'] = cm_max_value

        # Zero values represents not exist target in prediction values or classes with no confusion
        df_report = df_report[df_report['value'] != 0]

        return df_report
Exemple #16
0
    def plot_cnf_matrix(self,
                        cm,
                        classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
        """
        This function prints and plots the confusion matrix.
        Normalization can be applied by setting `normalize=True`.
        """
        import itertools
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print('Confusion matrix, without normalization')

        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)

        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j,
                     i,
                     format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()
def plot_confusion_matrix(confusion_matrix,
                          class_labels,
                          normalize=False,
                          title='Confusion Matrix',
                          cmap=plt.cm.Blues):
    """ Code courtesy of Abinav Sagar: https://towardsdatascience.com/convolutional-neural-network-for-breast-cancer-classification-52f1213dcc9 """

    if normalize:
        confusion_matrix = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(confusion_matrix)

    plt.imshow(confusion_matrix, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(class_labels))
    plt.xticks(tick_marks, class_labels, rotation=55)
    plt.yticks(tick_marks, class_labels)
    fmt = '.2f' if normalize else 'd'
    thresh = confusion_matrix.max() / 2.
    for i, j in itertools.product(range(confusion_matrix.shape[0]),
                                  range(confusion_matrix.shape[1])):
        plt.text(j,
                 i,
                 format(confusion_matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if confusion_matrix[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
Exemple #18
0
def val():
    myresnet_model.eval()

    val_acc = 0
    test_loss = 0
    with torch.no_grad():
        confusion_matrix = torch.zeros(nb_classes, nb_classes)
        for inputs, labels, _ in valloader:
            inputs, labels = inputs.to(device), labels.to(device)
            logps = myresnet_model.forward(inputs)
            batch_loss = criterion(logps, labels)
            test_loss += batch_loss.item()
            ps = torch.exp(logps)
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            val_acc += torch.mean(equals.type(torch.FloatTensor)).item()
            top_p = top_p.view(-1)
            labels = labels.view(-1)
            top_class = top_class.view(-1)
            for t, p in zip(labels, top_class):
                t = np.long(t)
                p = np.long(p)
                confusion_matrix[t, p] += 1
        print('confusion_matrix: ', confusion_matrix)
        per_class_acc = confusion_matrix.diag() / confusion_matrix.sum(1)
        print('per_class_acc: ', per_class_acc)
        #per_class_acc = per_class_acc.detach().cpu().numpy()
        #per_class_acc = np.reshape(per_class_acc, (1, 2))
        #per_class_acc = np.append(per_class_acc, np.array(per_class_acc), axis=0)

    return val_acc / len(valloader), per_class_acc
Exemple #19
0
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
def plot_confusion_matrix(confusion_matrix, classes, title, normalize=False):
    plt.clf()
    plt.imshow(confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        confusion_matrix = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]

    thresh = confusion_matrix.max() / 2.
    for i, j in itertools.product(range(confusion_matrix.shape[0]),
                                  range(confusion_matrix.shape[1])):
        plt.text(j,
                 i,
                 round(confusion_matrix[i, j], 3),
                 horizontalalignment="center",
                 color="white" if confusion_matrix[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Реальные классы')
    plt.xlabel('Расчетные классы')
def weighted_MIoU(confusion_matrix):
    '''
    calculate the weighted mean intersection over Union for each class
    '''
    freq = confusion_matrix.sum(axis=1) / confusion_matrix.sum()

    intersection = np.diag(confusion_matrix)
    union_part = confusion_matrix.sum(axis=0) + confusion_matrix.sum(
        axis=1) - np.diag(confusion_matrix)
    #    # set the value to 1 to avoid dividing zero problem,
    #    # based on the characteristic of confusion value
    #    union_part[union_part==0] = 1
    with np.errstate(divide='ignore', invalid='ignore'):
        IoU = intersection / union_part
        w_MIoU = np.multiply(freq[IoU >= 0], IoU[IoU >= 0]).sum()
    return w_MIoU
def plot_confusion_matrix(path,
                          confusion_matrix,
                          filename,
                          class_names=['background', 'weed', 'sugar_beet'],
                          normalize=False,
                          color_map=plt.cm.Blues,
                          eps=1e-6):
    """Reference: https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
    """

    accuracy = np.trace(confusion_matrix) / (
        np.sum(confusion_matrix).astype('float') + eps)
    misclass = 1 - accuracy

    if normalize:
        confusion_matrix = confusion_matrix.astype('float') / (
            confusion_matrix.sum(axis=1)[:, np.newaxis] + 1e-10)

    fig, ax = plt.subplots()
    im = ax.imshow(confusion_matrix, interpolation='nearest', cmap=color_map)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(
        xticks=np.arange(confusion_matrix.shape[1]),
        yticks=np.arange(confusion_matrix.shape[0]),
        # ... and label them with the respective list entries
        xticklabels=class_names,
        yticklabels=class_names,
        title='Confusion Matrix: ' + Path(filename).stem,
        ylabel='Actual label',
        xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(),
             rotation=45,
             ha="right",
             va="center",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = confusion_matrix.max() / 2.
    for i in range(confusion_matrix.shape[0]):
        for j in range(confusion_matrix.shape[1]):
            ax.text(
                j,
                i,
                format(confusion_matrix[i, j], fmt),
                ha="center",
                va="center",
                color="white" if confusion_matrix[i, j] > thresh else "black")

    confusion_matrix_dir_path = path / 'confusion_matrix'
    if not confusion_matrix_dir_path.exists():
        confusion_matrix_dir_path.mkdir()
    confusion_matrix_path = confusion_matrix_dir_path / filename

    ax.set_ylim(len(confusion_matrix) - 0.5, -0.5)
    fig.savefig(str(confusion_matrix_path), bbox_inches='tight')
    plt.close('all')
Exemple #23
0
def kappa_stats(confusion_matrix):
    #Calculate the Cochren's kappa for a binary classification problem
    #Input is a confusion matrix for a binary classification problem:
    tp, fp, fn, tn = confusion_matrix[0,0], confusion_matrix[0,1], confusion_matrix[1,0], confusion_matrix[1,1] 
    po = (tp + tn)/(tp + fp + tn + fn)
    pe = (tp + fn) * (tp + fp) / ((confusion_matrix.sum())**2)
    kappa = (po - pe) / (1 - pe)
    return kappa
Exemple #24
0
def cramers_corrected_stat(confusion_matrix):
    chi2 = stats.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
    rcorr = r - ((r - 1)**2) / (n - 1)
    kcorr = k - ((k - 1)**2) / (n - 1)
    return np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
Exemple #25
0
def get_tp_fp(confusion_matrix):
    """ 
    Calculates the true positive and false positive rate 
    Based on code from https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fal
    :param confusion_matrix: The confusion matrix for the given dataset
    :return The returned true positive and false positive rate
    """
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)

    # True positive rate
    true_positive_rate = TP / (TP + FN)
    # True negative rate
    false_positive_rate = FP / (FP + TN)

    results = {"tp_rate": true_positive_rate, "fp_rate": false_positive_rate}
    return results
Exemple #26
0
def cramers_phi(x, y):
    confusion_matrix = pd.crosstab(x,y)
    chi2 = ss.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = chi2/n
    r,k = confusion_matrix.shape
    phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
    rcorr = r-((r-1)**2)/(n-1)
    kcorr = k-((k-1)**2)/(n-1)
    return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
def print_confusion_matrix(confusion_matrix,
                           class_names,
                           figsize=(10, 7),
                           fontsize=14,
                           normalize=True):
    """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.

    Arguments
    ---------
    confusion_matrix: numpy.ndarray
        The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix.
        Similarly constructed ndarrays can also be used.
    class_names: list
        An ordered list of class names, in the order they index the given confusion matrix.
    figsize: tuple
        A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
        the second determining the vertical size. Defaults to (10,7).
    fontsize: int
        Font size for axes labels. Defaults to 14.

    Returns
    -------
    matplotlib.figure.Figure
        The resulting confusion matrix figure
    """
    if normalize:
        confusion_matrix = (confusion_matrix.astype("float") /
                            confusion_matrix.sum(axis=1)[:, np.newaxis])
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")
    df_cm = pd.DataFrame(
        confusion_matrix,
        index=class_names,
        columns=class_names,
    )
    fig = plt.figure(figsize=figsize)
    fmt = ".2f" if normalize else "d"
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt=fmt)
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(),
                                 rotation=0,
                                 ha="right",
                                 fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(),
                                 rotation=45,
                                 ha="right",
                                 fontsize=fontsize)
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    return fig
Exemple #28
0
    def forward_ni(self, filepath):
        # data processing
        df = pd.read_csv(filepath, header=None)  # no column names!!!

        df_x = df.iloc[:, :9]
        df_x = df_x.div(df_x.sum(axis=1), axis=0)  # normalize

        X = df_x
        X_scaling = StandardScaler().fit_transform(X)  # numpy.array
        input_data = torch.tensor(X_scaling, requires_grad=True)
        input_data = input_data.view(-1, self.sequence_length, self.input_size)

        y_new = df.iloc[:, -1]

        input_data = input_data.float().to(device)

        ##############
        self.model.eval()
        result = self.model(input_data)
        _, predict = torch.max(result, 1)
        predict = predict.cpu()
        predict = predict.numpy()
        i = 0
        print(predict)
        print(y_new.head(10))
        count = 0
        for i in range(len(predict)):
            # print(predict)
            if predict[i] == y_new[i]:
                count += 1

        acc = float(count / len(predict))
        # print('Accuracy: {}%'.format(acc*100))
        from sklearn.metrics import confusion_matrix

        confusion_matrix = confusion_matrix(y_true=y_new, y_pred=predict)

        # #Normalize CM
        confusion_matrix = cm = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
        df_cm = pd.DataFrame(confusion_matrix)

        # plot confusion matrix
        fig, ax = plt.subplots()
        sns.heatmap(df_cm, cmap="coolwarm", annot=False)
        fig.set_size_inches(8, 6)
        ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath))
        ax.set_xlabel('Perdicted Label', fontsize=12)
        ax.set_ylabel('Actual Label', fontsize=12)

        plt.show()

        return predict, acc
def calculate_f1(confusion_matrix):
    sum_r = confusion_matrix.sum(0)
    sum_c = confusion_matrix.sum(1)

    evaluation_matrix = dict()
    keys = sorted(confusion_matrix.keys())
    for key in keys:
        tp = confusion_matrix[key][key]
        if sum_r[key] == 0:
            p = 0
        else:
            p = tp / sum_r[key]
        if sum_c[key] == 0:
            r = 0
        else:
            r = tp / sum_c[key]
        if (p + r) == 0:
            f1 = 0
        else:
            f1 = (2 * p * r) / (p + r)
    return f1
Exemple #30
0
def calculate_confusion_matrix_stats_predictions(labels, predictions):
    confusion_matrix = calculate_confusion_matrix_predictions(
        labels, predictions)
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP / (TP + FN)
    # Specificity or true negative rate
    TNR = TN / (TN + FP)
    # Precision or positive predictive value
    PPV = TP / (TP + FP)
    # Negative predictive value
    NPV = TN / (TN + FN)
    # Fall out or false positive rate
    FPR = FP / (FP + TN)
    # False negative rate
    FNR = FN / (TP + FN)
    # False discovery rate
    FDR = FP / (TP + FP)
    Acc = (TN + TP) / (TN + TP + FN + FP)
    return {
        "Acc": Acc,
        "TP": TP,
        "TN": TN,
        "FP": FP,
        "FN": FN,
        "TPR": TPR,
        "TNR": TNR,
        "PPV": PPV,
        "NPV": NPV,
        "FPR": FPR,
        "FNR": FNR,
        "FDR": FDR,
        "AM": (TPR + TNR) / 2,
        "GM": np.sqrt(TPR * TNR),
    }
def plot_me_nice(confusion_matrix, labels, style='gist_heat', title='unknown'):
    confusion_matrix = confusion_matrix.astype('float') / confusion_matrix.sum(
        axis=1)[:, np.newaxis]
    fig, ax = plt.subplots()
    hm = ax.pcolor(confusion_matrix, cmap=style)
    ax.set_xticks(np.arange(len(labels)) + 0.5, minor=False)
    ax.set_yticks(np.arange(len(labels)) + 0.5, minor=False)
    ax.set_xticklabels(labels, minor=False)
    ax.set_yticklabels(labels, minor=False)
    plt.xticks(rotation=90)
    plt.colorbar(hm)
    plt.title(title)
    return plt.show()