Beispiel #1
0
def cal_rec_prec(label, predict_pos, stride=0.05):
    ''' recall-precesion曲線 '''
    thresholds = [round(i * stride, 2) for i in range(round(1 / stride) + 1)]
    prec_pos, rec_pos, thresh_pos = prc(label, predict_pos, pos_label=1)
    predict_neg = [1 - p for p in predict_pos]
    prec_neg, rec_neg, thresh_neg = prc(label, predict_neg, pos_label=0)
    df = pd.DataFrame(columns=[
        'threshold', 'recall_pos', 'precision_pos', 'recall_neg',
        'precision_neg'
    ])
    for i, threshold in enumerate(thresholds):
        idx_pos = bisect_left(thresh_pos, threshold)
        idx_neg = bisect_left(thresh_neg, threshold)
        df.loc[str(i)] = [
            threshold, rec_pos[idx_pos], prec_pos[idx_pos], rec_neg[idx_neg],
            prec_neg[idx_neg]
        ]
    return df
Beispiel #2
0
def plot_precision_recall(labels, label_probs):
    #Recall-precision curve => Best for less balanced data
    precision, recall, thresholds = prc(labels, label_probs)
    plt.plot(recall, precision)
    plt.plot([0, 1], [0.5, 0.5], linestyle='--')
    plt.title('Precision-Recall Curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.grid(True)
    plt.show()
def gwas_roc(weights, causal_snps, positions=None, top=1000, nearby=1000):
    weights = limitPrediction(weights, top)
    score = np.array(weights)
    label = np.zeros(len(weights))
    if positions is None:
        positions = getPositions(len(score))
    for k in causal_snps:
        mini, maxi = getNearbyIndex(k, positions, nearby)
        i = np.argmax(score[mini:maxi])
        label[mini + i] = 1
    # fpr, tpr, t = roc_curve(label, score)
    p, r, t = prc(label, score)
    # return fpr, tpr
    return r, p
def gwas_roc(weights, causal_snps, positions=None, top=1000, nearby=1000):
    weights = limitPrediction(weights, top)
    score = np.array(weights)
    label = np.zeros(len(weights))
    if positions is None:
        positions = getPositions(len(score))
    for k in causal_snps:
        mini, maxi = getNearbyIndex(k, positions, nearby)
        i = np.argmax(score[mini:maxi])
        label[mini+i] = 1
    # fpr, tpr, t = roc_curve(label, score)
    p, r, t = prc(label, score)
    # return fpr, tpr
    return r, p
Beispiel #5
0
def extra_analysis(cls, tdata, tclss, labels, n_folds=10):
    clss = sorted(labels.keys())
    lbs = [labels[cl] for cl in clss]
    cv = StratifiedKFold(tclss, n_folds=n_folds)
    keys = (
        "fprs",
        "tprs",
        "roc_scores",
        "pr_scores",
        "precisions",
        "recalls",
        "thresholds",
    )
    train_errors, test_errors, scores, cms = [], [], [], []
    lk = {l: {k: [] for k in keys} for l in clss}
    clf = cls["classifier"](**cls["kwargs"])
    for train, test in cv:
        X_train, y_train = tdata[train], tclss[train]
        X_test, y_test = tdata[test], tclss[test]
        # fit train data
        clf.fit(X_train, y_train)

        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        scores.append(test_score)

        train_errors.append(1 - train_score)
        test_errors.append(1 - test_score)

        y_pred = clf.predict(X_test)
        cms.append(confusion_matrix(y_test, y_pred))
        # get probability
        proba = clf.predict_proba(X_test)
        # compute score for each class VS rest
        for idx, label in enumerate(clss):
            fpr, tpr, roc_thr = roc_curve(y_test, proba[:, idx], label)
            precision, recall, pr_thr = prc(y_test == label, proba[:, idx],
                                            label)
            lk[label]["fprs"].append(fpr)
            lk[label]["tprs"].append(tpr)
            lk[label]["roc_scores"].append(auc(fpr, tpr))

            lk[label]["precisions"].append(precision)
            lk[label]["recalls"].append(recall)
            lk[label]["thresholds"].append(pr_thr)
            lk[label]["pr_scores"].append(auc(recall, precision))
    cls["label scores"] = lk
    cls["train errors"] = np.array(train_errors)
    cls["test errors"] = np.array(test_errors)
    cls["confusion matrix"] = cms
Beispiel #6
0
    def computeAUROC (dataGT, dataPRED, classCount):


        outAUROC = []
        outAUPRC = []
        outAP = []
        
        datanpGT = dataGT.cpu().numpy()
        datanpPRED = dataPRED.cpu().numpy()
        
        for i in range(classCount):
            outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i], average = 'weighted'))
            outP, outR, _ = prc(datanpGT[:, i], datanpPRED[:, i])
            outAUPRC.append(auc(outR, outP, False))
            outAP.append(ap(datanpGT[:, i], datanpPRED[:, i]))
                
        return outAUROC, outAUPRC, outAP
Beispiel #7
0
def evaluate_model(h5_file, pred_file):
    """
    evaluate the trained model. Plot ROC curve and calculate AUC.

    inputs:
    model json file path, model weights file.

    outputs:
    filename of the plotting.
    """
    try:
        batch_size = 32
        model = load_model(h5_file)
        file_path = os.path.dirname(h5_file)
        filename_base = os.path.basename(h5_file).split('.')[0]

        hdf5_file = tables.open_file(pred_file, mode='r')
        m_pred = hdf5_file.root.test_img.shape[0]
        steps = int(ceil(m_pred / batch_size))
        generator = read_hdf5(hdf5_file, dataset="test", batch_size=32)

        preds = model.predict_generator(generator, steps=steps, verbose=1)
        preds = np.array(preds)[:, 1]
        logging.debug(f'preds: {preds}')
        true_values = hdf5_file.root.test_labels
        fpr, tpr, _ = roc_curve(list(true_values), list(preds))
        precision, recall, thresholds = prc(list(true_values), list(preds))
        average_precision = average_precision_score(
            list(true_values), list(preds))
        roc_auc = auc(fpr, tpr)

        roc_name = os.path.join(file_path, filename_base + "_roc.png")
        prc_name = os.path.join(file_path, filename_base + "_prc.png")
        clear_plot()
        plot_roc(fpr, tpr, roc_auc, roc_name)
        clear_plot()
        plot_prc(recall, precision, average_precision, prc_name)
        clear_plot()

    finally:
        hdf5_file.close()
Beispiel #8
0
def analyze(classifier, X_val, y_val, prc_ax, roc_ax, **params):

    # y_predict = classifier.predict(X_val)
    if params['model'] is 'svm' or params['model'] is 'logistic':
        y_predict = classifier.decision_function(X_val)
    else:
        y_predict = classifier.predict_proba(X_val)[:, 1]

    # Accuracy
    accuracy = classifier.score(X_val, y_val)

    # Precision-Recall
    auprc = prc_score(y_val, y_predict)
    precision, recall, thresholds = prc(y_val, y_predict)
    prc_ax.plot(recall, precision, label='AUC={}'.format(auprc))

    # Receiver Operating Characteristics
    fpr, tpr, thr = roc(y_val, y_predict, pos_label=1)
    auroc = roc_score(fpr, tpr)
    roc_ax.plot(fpr, tpr, label='AUC={}'.format(auroc))

    return accuracy, auprc, auroc
Beispiel #9
0
def precision_recall_curve(output, target):
    try:
        from sklearn.metrics import precision_recall_curve as prc
    except ImportError:
        raise RuntimeError(
            "Precision Recall Curve requires scikit-learn to be installed.")

    with torch.no_grad():
        pred = torch.argmax(output, dim=1)
        assert pred.shape[0] == len(target)
        fpr, tpr, _ = prc(target.cpu().numpy(), output[:, 1].cpu().numpy())

    fig = plt.figure()
    plt.plot(fpr, tpr)
    fig.canvas.draw()

    buf = np.asarray(fig.canvas.buffer_rgba(), dtype=np.uint8)[:, :, :3]
    image = torch.from_numpy(buf).permute(2, 0, 1)

    plt.close(fig)

    return image
Beispiel #10
0
with open('predictions.csv', 'r') as f:
    lines = list(map(lambda l: l.strip(), f.readlines()))

from sklearn.metrics import precision_recall_curve as prc
from sklearn.metrics import auc

ys = []
probas = []
for l in lines:
    y, proba = l.split(',')
    y = 0 if y == '-' else 1
    proba = float(proba)
    ys.append(y)
    probas.append(proba)

P, R, T = prc(ys, probas)
print(auc(R, P))
Beispiel #11
0
            # classify the feature vector and store the output
            p = clf_linear.predict(feat)
            pred_lin.append(p)
            p = clf_rbf.predict(feat)
            pred_rbf.append(p)
    # Now let's calculate the accuracy and the confusion matrix fore each.
    acc_lin = accuracy_score(eval_labels, pred_lin)
    print('Accuracy of the linear SVM based BoVW is: {:0.04f}'.format(acc_lin))
    print(confusion_matrix(eval_labels, pred_lin))

    acc_rbf = accuracy_score(eval_labels, pred_rbf)
    print('Accuracy of the rbf SVM based BoVW is: {:0.04f}'.format(acc_rbf))
    print(confusion_matrix(eval_labels, pred_rbf))

    # now the f1score stuff
    p, r, t = prc(eval_labels, pred_rbf)
    # print( 't', len( t ) )
    f1 = 2 * p * r / (p + r + 0.0000001)
    am = np.argmax(f1)
    plt.figure()
    plt.plot()
    plt.plot(r, p)
    plt.plot(r[am], p[am], 'r*')
    plt.title('RBF Precision Recall: F1-score of {}'.format(f1[am]))
    plt.show()
"""
  ####### 2. LBP-MLP
"""
if mlp:
    train_labels = []
    firstfile = True
Beispiel #12
0
import pandas as pd
import numpy as np
import sklearn
import sklearn.metrics
from sklearn.metrics import roc_auc_score as ras
from sklearn.metrics import precision_recall_curve as prc

df = pd.read_csv('scores.csv')
df0 = df.drop(df.columns[[1, 2, 3, 4]], axis=1)
df1 = df.drop(df.columns[[0, 2, 3, 4]], axis=1)
df2 = df.drop(df.columns[[0, 1, 3, 4]], axis=1)
df3 = df.drop(df.columns[[0, 1, 2, 4]], axis=1)
df4 = df.drop(df.columns[[0, 1, 2, 3]], axis=1)
#print(  ras(df0,df1),ras(df0,df2),ras(df0,df3),ras(df0,df4)   )

precision, recall, thresholds = prc(df0, df4)
#print(precision,'\n\n', recall,'\n\n', thresholds)
p = []
for i in range(13):
    if recall[i] >= 0.7:
        p.append(precision[i])
print(max(p))
Beispiel #13
0
X = cancer.data
Y = cancer.target

#split data
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, Y, test_size=0.2)

classes = ['malignant' 'benign']

#SVM classification
clf = svm.SVC(kernel="linear", C=2)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
#print accuracy
acc = metrics.accuracy_score(y_test, y_pred)
print("accuracy: \n")
print(acc)
#print confusion matrix
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print("confusion matrix: \n")
print(confusion_matrix)
#classification_report
classification_report = cr(y_test, y_pred)
print(classification_report)
#precission recall
precision, recall, threshold = prc(y_test, y_pred)
print("Precision: ", precision)
print("Recall: ", recall)
print("threshold: ", threshold)
Beispiel #14
0
  for i, th in enumerate( thrs ):
    o = classification_report( labels, sc>=th, output_dict=True, zero_division=0 )
    f1val[i] = o['True']['f1-score']
    precision[i] = o['True']['precision']
    recall[i] = o['True']['recall']
  # Next we will plot the precision(y-axis) and recall (x-axis) based on your arrays.
  # And we will display the best F1-score (using np.argmax).
  am = np.argmax( f1val )
  plt.figure()
  plt.plot()
  plt.plot( recall, precision )
  plt.plot( recall[am], precision[am], 'r*' )
  plt.title( 'Classification Report - Precision Recall: F1-score of {}'.format( f1val[am] ) )
  plt.show()

  # 2. precision_recall_curve - recommended for your assignment
  # This is a much simpler version and the one I would recommed you use from now on.
  # first import precision_recall_curve from sklearn.metrics, i have imported it as prc.
  # this looks like precision, recall, thresholds = precision_recall_curve( labels, scores )
  p, r, t = prc( labels, sc )
  # Now you need to calculate the f1-score in the same way as above.
  f1 = 2*p*r/(p+r+0.0000001)
  # plot the precision recall and the point where F1-score is at it's maximum.
  am = np.argmax( f1 )
  plt.figure()
  plt.plot()
  plt.plot( r, p )
  plt.plot( r[am], p[am], 'r*' )
  plt.title( 'Precision recall curve - Precision Recall: F1-score of {}'.format( f1[am] ) )
  plt.show()
Beispiel #15
0
                              kmeans_all,
                              red_cluster=kmeans_all.red,
                              colourspace=colourspace,
                              yellow_cluster=kmeans_all.yellow,
                              verbose=verbose)
        mask_name = "msk_" + img
        os.makedirs(output_path, exist_ok=True)
        imsave(os.path.join(output_path, mask_name), mask)

    # Create labels based on prediction on the evaluation dataset for background and red
    pred_eval_bg_red = kmeans_bg_red.prediction_labels(evaluation_data_bg_red)

    pred_eval_all = kmeans_all.prediction_labels(evaluation_data_all)

    # now the f1score stuff.
    p, r, t = prc(eval_labels_bg_red, pred_eval_bg_red)
    # print( 't', len( t ) )
    f1 = 2 * p * r / (p + r + 0.0000001)
    am = np.argmax(f1)
    plt.figure()
    plt.plot()
    plt.plot(r, p)
    plt.plot(r[am], p[am], 'r*')
    plt.title('Background and red data Precision Recall: F1-score of {:0.04f}'.
              format(f1[am]))
    #plt.show()

    # calculate the two accuracy scores. and confusion matrices
    acc_lin = accuracy_score(eval_labels_bg_red, pred_eval_bg_red)
    print('Accuracy of the bg and red data is: {:0.04f}'.format(acc_lin))
    print(confusion_matrix(eval_labels_bg_red, pred_eval_bg_red))
Beispiel #16
0
            p = clf_rbf.predict(feat)
            s = clf_rbf.decision_function(feat)
            pred_rbf.append(p)
            scr_rbf.append(s)

    # calculate the two accuracy scores. and confusion matrices
    acc_lin = accuracy_score(eval_labels, pred_lin)
    print('Accuracy of the linear SVM based BoVW is: {:0.04f}'.format(acc_lin))
    print(confusion_matrix(eval_labels, pred_lin))

    acc_rbf = accuracy_score(eval_labels, pred_rbf)
    print('Accuracy of the rbf SVM based BoVW is: {:0.04f}'.format(acc_rbf))
    print(confusion_matrix(eval_labels, pred_rbf))

    # now the f1score stuff.
    p, r, t = prc(eval_labels, scr_lin)
    # print( 't', len( t ) )
    f1 = 2 * p * r / (p + r + 0.0000001)
    am = np.argmax(f1)
    plt.figure()
    plt.plot()
    plt.plot(r, p)
    plt.plot(r[am], p[am], 'r*')
    plt.title('Linear Precision Recall: F1-score of {}'.format(f1[am]))
    plt.show()

    p, r, t = prc(eval_labels, scr_rbf)
    # print( 't', len( t ) )
    f1 = 2 * p * r / (p + r + 0.0000001)
    am = np.argmax(f1)
    plt.figure()
            title='Confusion Matrix',
            normalize=True)
plt.show()

probs = best_clf.predict_proba(X_test)  #default threshold is 0.5
probs = probs[:, 1]
auc = roc_auc_score(y_test, probs)
f1_ration = f1_score(y_test, best_clf.predict(X_test))
avg_precision_score = aps(y_test, probs)

#ROC-AUC Curve => Best for balanced data
fpr, tpr, thresholds = roc_curve(y_test, probs)
plt.plot(fpr, tpr)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.title('ROC curve for diabetes classifier')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')
plt.grid(True)
plt.show()

#Recall-precision curve => Best for less balanced data
precision, recall, thresholds = prc(y_test, probs)
plt.plot(recall, precision)
plt.plot([0, 1], [0.5, 0.5], linestyle='--')
plt.title('Precision-Recall Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.grid(True)
plt.show()
def precision_recall():
    y_true = np.random.randint(0, 2, 50)  #随机生成50个样本的标签,取值0或1
    y_scores = np.random.uniform(0, 1, 50)  #随机生成每个样本的置信度
    precision, recall, thresholds = prc(y_true,
                                        y_scores)  #调用precision_recall_curve
    return y_true, y_scores, precision, recall, thresholds
Beispiel #19
0
    for f in v:
        label.append(i)
        feat = extract_hog_matrix(f, orient, ppc, cpb)
        p, s = tmatch.predict(feat)
        pred.append(p)
        if start:
            scores = s
            start = False
        else:
            scores = np.vstack((scores, s))

# accuracy
acc = accuracy_score(label, pred)
print("Accuracy of KL-Divergance is", acc)
conf = confusion_matrix(label, pred)
print("Confusion matrix\n", conf)
# So we get about 3 times guess, not a great classifier but okay. What happens when you play
# with the HOG and number of cluster parameters? Can you get it better?

# if we are only using two textures we can do f1-score! This is important for your assignment.
# You can really only do p-r curves for 2 classes. If you do more than that you need to
# consider other metrics.
if len(ftest.keys()) == 2:
    p, r, t = prc(np.array(label), scores[:, 0])
    f1 = 2 * p * r / (p + r + 0.000001)
    ai = np.argmax(f1)
    plt.figure()
    plt.plot(r, p)
    plt.plot(r[ai], p[ai], 'r*')
    plt.title('Precision recall curve - F1 = {:0.03f}'.format(f1[ai]))
    plt.show()
Beispiel #20
0
            out_score = torch.FloatTensor().to(device)

            for batch_id, (input, target) in enumerate(validate_loader):
                var_input = torch.autograd.Variable(input).to(device)
                var_target = torch.autograd.Variable(target).to(device)

                var_output = model(var_input).to(device)

                loss_value = criterion(var_output, var_target).to(device)
                loss_validate += loss_value.data.item()

                out_true = torch.cat((out_true, var_target[:, 0]), 0)
                out_score = torch.cat((out_score, var_output[:, 0]), 0)

            auroc_mean = roc_auc_score(out_true, out_score, average='weighted')
            out_p, out_r, _ = prc(out_true, out_score)
            loss_validate = loss_validate / len(validate_loader)
            acc = ((out_score >
                    0.5) == out_true.byte()).float().mean().data.item()
            print('epoch', epoch_id, 'validate loss:', loss_validate, 'auroc',
                  auroc_mean, 'acc', acc)

        if loss_validate < loss_min:
            scheduler.step(loss_validate)
            loss_min = loss_validate
            data_model = {
                'epoch': epoch_id + 1,
                'labels': labels,
                'state_dict': model.state_dict(),
                'best_loss': loss_min,
                'optimizer': optimizer.state_dict()