def cal_rec_prec(label, predict_pos, stride=0.05): ''' recall-precesion曲線 ''' thresholds = [round(i * stride, 2) for i in range(round(1 / stride) + 1)] prec_pos, rec_pos, thresh_pos = prc(label, predict_pos, pos_label=1) predict_neg = [1 - p for p in predict_pos] prec_neg, rec_neg, thresh_neg = prc(label, predict_neg, pos_label=0) df = pd.DataFrame(columns=[ 'threshold', 'recall_pos', 'precision_pos', 'recall_neg', 'precision_neg' ]) for i, threshold in enumerate(thresholds): idx_pos = bisect_left(thresh_pos, threshold) idx_neg = bisect_left(thresh_neg, threshold) df.loc[str(i)] = [ threshold, rec_pos[idx_pos], prec_pos[idx_pos], rec_neg[idx_neg], prec_neg[idx_neg] ] return df
def plot_precision_recall(labels, label_probs): #Recall-precision curve => Best for less balanced data precision, recall, thresholds = prc(labels, label_probs) plt.plot(recall, precision) plt.plot([0, 1], [0.5, 0.5], linestyle='--') plt.title('Precision-Recall Curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.grid(True) plt.show()
def gwas_roc(weights, causal_snps, positions=None, top=1000, nearby=1000): weights = limitPrediction(weights, top) score = np.array(weights) label = np.zeros(len(weights)) if positions is None: positions = getPositions(len(score)) for k in causal_snps: mini, maxi = getNearbyIndex(k, positions, nearby) i = np.argmax(score[mini:maxi]) label[mini + i] = 1 # fpr, tpr, t = roc_curve(label, score) p, r, t = prc(label, score) # return fpr, tpr return r, p
def gwas_roc(weights, causal_snps, positions=None, top=1000, nearby=1000): weights = limitPrediction(weights, top) score = np.array(weights) label = np.zeros(len(weights)) if positions is None: positions = getPositions(len(score)) for k in causal_snps: mini, maxi = getNearbyIndex(k, positions, nearby) i = np.argmax(score[mini:maxi]) label[mini+i] = 1 # fpr, tpr, t = roc_curve(label, score) p, r, t = prc(label, score) # return fpr, tpr return r, p
def extra_analysis(cls, tdata, tclss, labels, n_folds=10): clss = sorted(labels.keys()) lbs = [labels[cl] for cl in clss] cv = StratifiedKFold(tclss, n_folds=n_folds) keys = ( "fprs", "tprs", "roc_scores", "pr_scores", "precisions", "recalls", "thresholds", ) train_errors, test_errors, scores, cms = [], [], [], [] lk = {l: {k: [] for k in keys} for l in clss} clf = cls["classifier"](**cls["kwargs"]) for train, test in cv: X_train, y_train = tdata[train], tclss[train] X_test, y_test = tdata[test], tclss[test] # fit train data clf.fit(X_train, y_train) train_score = clf.score(X_train, y_train) test_score = clf.score(X_test, y_test) scores.append(test_score) train_errors.append(1 - train_score) test_errors.append(1 - test_score) y_pred = clf.predict(X_test) cms.append(confusion_matrix(y_test, y_pred)) # get probability proba = clf.predict_proba(X_test) # compute score for each class VS rest for idx, label in enumerate(clss): fpr, tpr, roc_thr = roc_curve(y_test, proba[:, idx], label) precision, recall, pr_thr = prc(y_test == label, proba[:, idx], label) lk[label]["fprs"].append(fpr) lk[label]["tprs"].append(tpr) lk[label]["roc_scores"].append(auc(fpr, tpr)) lk[label]["precisions"].append(precision) lk[label]["recalls"].append(recall) lk[label]["thresholds"].append(pr_thr) lk[label]["pr_scores"].append(auc(recall, precision)) cls["label scores"] = lk cls["train errors"] = np.array(train_errors) cls["test errors"] = np.array(test_errors) cls["confusion matrix"] = cms
def computeAUROC (dataGT, dataPRED, classCount): outAUROC = [] outAUPRC = [] outAP = [] datanpGT = dataGT.cpu().numpy() datanpPRED = dataPRED.cpu().numpy() for i in range(classCount): outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i], average = 'weighted')) outP, outR, _ = prc(datanpGT[:, i], datanpPRED[:, i]) outAUPRC.append(auc(outR, outP, False)) outAP.append(ap(datanpGT[:, i], datanpPRED[:, i])) return outAUROC, outAUPRC, outAP
def evaluate_model(h5_file, pred_file): """ evaluate the trained model. Plot ROC curve and calculate AUC. inputs: model json file path, model weights file. outputs: filename of the plotting. """ try: batch_size = 32 model = load_model(h5_file) file_path = os.path.dirname(h5_file) filename_base = os.path.basename(h5_file).split('.')[0] hdf5_file = tables.open_file(pred_file, mode='r') m_pred = hdf5_file.root.test_img.shape[0] steps = int(ceil(m_pred / batch_size)) generator = read_hdf5(hdf5_file, dataset="test", batch_size=32) preds = model.predict_generator(generator, steps=steps, verbose=1) preds = np.array(preds)[:, 1] logging.debug(f'preds: {preds}') true_values = hdf5_file.root.test_labels fpr, tpr, _ = roc_curve(list(true_values), list(preds)) precision, recall, thresholds = prc(list(true_values), list(preds)) average_precision = average_precision_score( list(true_values), list(preds)) roc_auc = auc(fpr, tpr) roc_name = os.path.join(file_path, filename_base + "_roc.png") prc_name = os.path.join(file_path, filename_base + "_prc.png") clear_plot() plot_roc(fpr, tpr, roc_auc, roc_name) clear_plot() plot_prc(recall, precision, average_precision, prc_name) clear_plot() finally: hdf5_file.close()
def analyze(classifier, X_val, y_val, prc_ax, roc_ax, **params): # y_predict = classifier.predict(X_val) if params['model'] is 'svm' or params['model'] is 'logistic': y_predict = classifier.decision_function(X_val) else: y_predict = classifier.predict_proba(X_val)[:, 1] # Accuracy accuracy = classifier.score(X_val, y_val) # Precision-Recall auprc = prc_score(y_val, y_predict) precision, recall, thresholds = prc(y_val, y_predict) prc_ax.plot(recall, precision, label='AUC={}'.format(auprc)) # Receiver Operating Characteristics fpr, tpr, thr = roc(y_val, y_predict, pos_label=1) auroc = roc_score(fpr, tpr) roc_ax.plot(fpr, tpr, label='AUC={}'.format(auroc)) return accuracy, auprc, auroc
def precision_recall_curve(output, target): try: from sklearn.metrics import precision_recall_curve as prc except ImportError: raise RuntimeError( "Precision Recall Curve requires scikit-learn to be installed.") with torch.no_grad(): pred = torch.argmax(output, dim=1) assert pred.shape[0] == len(target) fpr, tpr, _ = prc(target.cpu().numpy(), output[:, 1].cpu().numpy()) fig = plt.figure() plt.plot(fpr, tpr) fig.canvas.draw() buf = np.asarray(fig.canvas.buffer_rgba(), dtype=np.uint8)[:, :, :3] image = torch.from_numpy(buf).permute(2, 0, 1) plt.close(fig) return image
with open('predictions.csv', 'r') as f: lines = list(map(lambda l: l.strip(), f.readlines())) from sklearn.metrics import precision_recall_curve as prc from sklearn.metrics import auc ys = [] probas = [] for l in lines: y, proba = l.split(',') y = 0 if y == '-' else 1 proba = float(proba) ys.append(y) probas.append(proba) P, R, T = prc(ys, probas) print(auc(R, P))
# classify the feature vector and store the output p = clf_linear.predict(feat) pred_lin.append(p) p = clf_rbf.predict(feat) pred_rbf.append(p) # Now let's calculate the accuracy and the confusion matrix fore each. acc_lin = accuracy_score(eval_labels, pred_lin) print('Accuracy of the linear SVM based BoVW is: {:0.04f}'.format(acc_lin)) print(confusion_matrix(eval_labels, pred_lin)) acc_rbf = accuracy_score(eval_labels, pred_rbf) print('Accuracy of the rbf SVM based BoVW is: {:0.04f}'.format(acc_rbf)) print(confusion_matrix(eval_labels, pred_rbf)) # now the f1score stuff p, r, t = prc(eval_labels, pred_rbf) # print( 't', len( t ) ) f1 = 2 * p * r / (p + r + 0.0000001) am = np.argmax(f1) plt.figure() plt.plot() plt.plot(r, p) plt.plot(r[am], p[am], 'r*') plt.title('RBF Precision Recall: F1-score of {}'.format(f1[am])) plt.show() """ ####### 2. LBP-MLP """ if mlp: train_labels = [] firstfile = True
import pandas as pd import numpy as np import sklearn import sklearn.metrics from sklearn.metrics import roc_auc_score as ras from sklearn.metrics import precision_recall_curve as prc df = pd.read_csv('scores.csv') df0 = df.drop(df.columns[[1, 2, 3, 4]], axis=1) df1 = df.drop(df.columns[[0, 2, 3, 4]], axis=1) df2 = df.drop(df.columns[[0, 1, 3, 4]], axis=1) df3 = df.drop(df.columns[[0, 1, 2, 4]], axis=1) df4 = df.drop(df.columns[[0, 1, 2, 3]], axis=1) #print( ras(df0,df1),ras(df0,df2),ras(df0,df3),ras(df0,df4) ) precision, recall, thresholds = prc(df0, df4) #print(precision,'\n\n', recall,'\n\n', thresholds) p = [] for i in range(13): if recall[i] >= 0.7: p.append(precision[i]) print(max(p))
X = cancer.data Y = cancer.target #split data x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split( X, Y, test_size=0.2) classes = ['malignant' 'benign'] #SVM classification clf = svm.SVC(kernel="linear", C=2) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) #print accuracy acc = metrics.accuracy_score(y_test, y_pred) print("accuracy: \n") print(acc) #print confusion matrix confusion_matrix = metrics.confusion_matrix(y_test, y_pred) print("confusion matrix: \n") print(confusion_matrix) #classification_report classification_report = cr(y_test, y_pred) print(classification_report) #precission recall precision, recall, threshold = prc(y_test, y_pred) print("Precision: ", precision) print("Recall: ", recall) print("threshold: ", threshold)
for i, th in enumerate( thrs ): o = classification_report( labels, sc>=th, output_dict=True, zero_division=0 ) f1val[i] = o['True']['f1-score'] precision[i] = o['True']['precision'] recall[i] = o['True']['recall'] # Next we will plot the precision(y-axis) and recall (x-axis) based on your arrays. # And we will display the best F1-score (using np.argmax). am = np.argmax( f1val ) plt.figure() plt.plot() plt.plot( recall, precision ) plt.plot( recall[am], precision[am], 'r*' ) plt.title( 'Classification Report - Precision Recall: F1-score of {}'.format( f1val[am] ) ) plt.show() # 2. precision_recall_curve - recommended for your assignment # This is a much simpler version and the one I would recommed you use from now on. # first import precision_recall_curve from sklearn.metrics, i have imported it as prc. # this looks like precision, recall, thresholds = precision_recall_curve( labels, scores ) p, r, t = prc( labels, sc ) # Now you need to calculate the f1-score in the same way as above. f1 = 2*p*r/(p+r+0.0000001) # plot the precision recall and the point where F1-score is at it's maximum. am = np.argmax( f1 ) plt.figure() plt.plot() plt.plot( r, p ) plt.plot( r[am], p[am], 'r*' ) plt.title( 'Precision recall curve - Precision Recall: F1-score of {}'.format( f1[am] ) ) plt.show()
kmeans_all, red_cluster=kmeans_all.red, colourspace=colourspace, yellow_cluster=kmeans_all.yellow, verbose=verbose) mask_name = "msk_" + img os.makedirs(output_path, exist_ok=True) imsave(os.path.join(output_path, mask_name), mask) # Create labels based on prediction on the evaluation dataset for background and red pred_eval_bg_red = kmeans_bg_red.prediction_labels(evaluation_data_bg_red) pred_eval_all = kmeans_all.prediction_labels(evaluation_data_all) # now the f1score stuff. p, r, t = prc(eval_labels_bg_red, pred_eval_bg_red) # print( 't', len( t ) ) f1 = 2 * p * r / (p + r + 0.0000001) am = np.argmax(f1) plt.figure() plt.plot() plt.plot(r, p) plt.plot(r[am], p[am], 'r*') plt.title('Background and red data Precision Recall: F1-score of {:0.04f}'. format(f1[am])) #plt.show() # calculate the two accuracy scores. and confusion matrices acc_lin = accuracy_score(eval_labels_bg_red, pred_eval_bg_red) print('Accuracy of the bg and red data is: {:0.04f}'.format(acc_lin)) print(confusion_matrix(eval_labels_bg_red, pred_eval_bg_red))
p = clf_rbf.predict(feat) s = clf_rbf.decision_function(feat) pred_rbf.append(p) scr_rbf.append(s) # calculate the two accuracy scores. and confusion matrices acc_lin = accuracy_score(eval_labels, pred_lin) print('Accuracy of the linear SVM based BoVW is: {:0.04f}'.format(acc_lin)) print(confusion_matrix(eval_labels, pred_lin)) acc_rbf = accuracy_score(eval_labels, pred_rbf) print('Accuracy of the rbf SVM based BoVW is: {:0.04f}'.format(acc_rbf)) print(confusion_matrix(eval_labels, pred_rbf)) # now the f1score stuff. p, r, t = prc(eval_labels, scr_lin) # print( 't', len( t ) ) f1 = 2 * p * r / (p + r + 0.0000001) am = np.argmax(f1) plt.figure() plt.plot() plt.plot(r, p) plt.plot(r[am], p[am], 'r*') plt.title('Linear Precision Recall: F1-score of {}'.format(f1[am])) plt.show() p, r, t = prc(eval_labels, scr_rbf) # print( 't', len( t ) ) f1 = 2 * p * r / (p + r + 0.0000001) am = np.argmax(f1) plt.figure()
title='Confusion Matrix', normalize=True) plt.show() probs = best_clf.predict_proba(X_test) #default threshold is 0.5 probs = probs[:, 1] auc = roc_auc_score(y_test, probs) f1_ration = f1_score(y_test, best_clf.predict(X_test)) avg_precision_score = aps(y_test, probs) #ROC-AUC Curve => Best for balanced data fpr, tpr, thresholds = roc_curve(y_test, probs) plt.plot(fpr, tpr) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.title('ROC curve for diabetes classifier') plt.xlabel('False Positive Rate (1 - Specificity)') plt.ylabel('True Positive Rate (Sensitivity)') plt.grid(True) plt.show() #Recall-precision curve => Best for less balanced data precision, recall, thresholds = prc(y_test, probs) plt.plot(recall, precision) plt.plot([0, 1], [0.5, 0.5], linestyle='--') plt.title('Precision-Recall Curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.grid(True) plt.show()
def precision_recall(): y_true = np.random.randint(0, 2, 50) #随机生成50个样本的标签,取值0或1 y_scores = np.random.uniform(0, 1, 50) #随机生成每个样本的置信度 precision, recall, thresholds = prc(y_true, y_scores) #调用precision_recall_curve return y_true, y_scores, precision, recall, thresholds
for f in v: label.append(i) feat = extract_hog_matrix(f, orient, ppc, cpb) p, s = tmatch.predict(feat) pred.append(p) if start: scores = s start = False else: scores = np.vstack((scores, s)) # accuracy acc = accuracy_score(label, pred) print("Accuracy of KL-Divergance is", acc) conf = confusion_matrix(label, pred) print("Confusion matrix\n", conf) # So we get about 3 times guess, not a great classifier but okay. What happens when you play # with the HOG and number of cluster parameters? Can you get it better? # if we are only using two textures we can do f1-score! This is important for your assignment. # You can really only do p-r curves for 2 classes. If you do more than that you need to # consider other metrics. if len(ftest.keys()) == 2: p, r, t = prc(np.array(label), scores[:, 0]) f1 = 2 * p * r / (p + r + 0.000001) ai = np.argmax(f1) plt.figure() plt.plot(r, p) plt.plot(r[ai], p[ai], 'r*') plt.title('Precision recall curve - F1 = {:0.03f}'.format(f1[ai])) plt.show()
out_score = torch.FloatTensor().to(device) for batch_id, (input, target) in enumerate(validate_loader): var_input = torch.autograd.Variable(input).to(device) var_target = torch.autograd.Variable(target).to(device) var_output = model(var_input).to(device) loss_value = criterion(var_output, var_target).to(device) loss_validate += loss_value.data.item() out_true = torch.cat((out_true, var_target[:, 0]), 0) out_score = torch.cat((out_score, var_output[:, 0]), 0) auroc_mean = roc_auc_score(out_true, out_score, average='weighted') out_p, out_r, _ = prc(out_true, out_score) loss_validate = loss_validate / len(validate_loader) acc = ((out_score > 0.5) == out_true.byte()).float().mean().data.item() print('epoch', epoch_id, 'validate loss:', loss_validate, 'auroc', auroc_mean, 'acc', acc) if loss_validate < loss_min: scheduler.step(loss_validate) loss_min = loss_validate data_model = { 'epoch': epoch_id + 1, 'labels': labels, 'state_dict': model.state_dict(), 'best_loss': loss_min, 'optimizer': optimizer.state_dict()