def _binary_roc_auc_score(y_true, y_score, sample_weight=None): if len(np.unique(y_true)) != 2: raise ValueError("Only one class present in y_true. ROC AUC score " "is not defined in that case.") fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight) if max_fpr is None or max_fpr == 1: return auc(fpr, tpr) if max_fpr <= 0 or max_fpr > 1: raise ValueError("Expected max_frp in range ]0, 1], got: %r" % max_fpr) # Add a single point at max_fpr by linear interpolation stop = np.searchsorted(fpr, max_fpr, 'right') x_interp = [fpr[stop - 1], fpr[stop]] y_interp = [tpr[stop - 1], tpr[stop]] tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp)) fpr = np.append(fpr[:stop], max_fpr) partial_auc = auc(fpr, tpr) # McClish correction: standardize result to be 0.5 if non-discriminant # and 1 if maximal min_area = 0.5 * max_fpr**2 max_area = max_fpr return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))
def Predict(self, inp, labels, classifier, folds, name, paramdesc): X= inp y = labels X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape ############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(y, n_folds=folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] _precision = 0.0 _recall = 0.0 _accuracy = 0.0 _f1 = 0.0 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) pred_ = classifier.predict(X[test]) _precision += precision_score(y[test], pred_) _recall += recall_score(y[test], pred_) _accuracy += accuracy_score(y[test], pred_) _f1 += f1_score(y[test], pred_) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) _precision /= folds _recall /= folds _accuracy /= folds _f1 /= folds plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic - {0}'.format(name)) plt.legend(loc="lower right") plt.savefig(self.configObject['outputdir'] + '/' + name + '.png') plt.close() result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) Announce(result)
def evalModel(predictor, test_data, test_labels, train_data, train_labels, name, evalresults): predictor.fit(train_data, train_labels) evalresults.setdefault(name + " Accuracy raw \t\t", []).append(accuracy_score(test_labels, predictor.predict(test_data))) #predictor.fit(preprocessing.scale(train_data), train_labels) #evalresults.setdefault(name + " Accuracy std \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.scale(test_data)))) #predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels) #evalresults.setdefault(name + " Accuracy nml \t\t", []).append(metrics.accuracy_score(test_labels, predictor.predict(preprocessing.normalize(test_data, norm='l2')))) return if len(set(train_labels)) != 2: return predictor.fit(train_data, train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(test_data)) evalresults.setdefault(name + " AUC raw \t\t", []).append(auc(fpr, tpr)) #return predictor.fit(preprocessing.scale(train_data), train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.scale(test_data))) evalresults.setdefault(name + " AUC std \t\t", []).append(auc(fpr, tpr)) predictor.fit(preprocessing.normalize(train_data, norm='l2'), train_labels) fpr, tpr, _ = roc_curve(test_labels, predictor.decision_function(preprocessing.normalize(test_data, norm='l2'))) evalresults.setdefault(name + " AUC nml \t\t", []).append(auc(fpr, tpr))
def plot_ROC(y_pred, y_test, name): fpr = dict() tpr = dict() roc_auc = dict() n_classes = 3 for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_pred.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) target_names = ['Reading', 'Speaking', 'Watching'] plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC (AUC = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC (AUC = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) lw = 2 colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC - {0} (AUC = {1:0.2f})' ''.format(target_names[i], roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=lw) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") plt.savefig("res/{}-ROC-AUC".format(name))
def recall_curve(rank, index_set=None, min_events=None): """ Calculate x and y of recall curve. :param rank: pandas.Series :param index_set: pandas.Series indices in rank :param min_events: int or None, optional Number of minimum number of index_set to calculate curve :return: """ x = rank.sort_values().dropna() # Observed cumsum if index_set is None: index_set = Utils.get_essential_genes(return_series=False) y = x.index.isin(index_set) if (min_events is not None) and (sum(y) < min_events): return None y = np.cumsum(y) / sum(y) # Rank fold-changes x = st.rankdata(x) / x.shape[0] # Calculate AUC xy_auc = auc(x, y) return x, y, xy_auc
def auc_xscaled(xs, ys): """AUC score scaled to fill x interval """ xmin, xmax = minmaxr(xs) denom = float(xmax - xmin) xs_corr = [(x - xmin) / denom for x in xs] return auc(xs_corr, ys)
def auc_score(self): """Replacement for Scikit-Learn's method If number of Y classes is other than two, a warning will be triggered but no exception thrown (the return value will be a NaN). Also, we don't reorder arrays during ROC calculation since they are assumed to be in order. """ return auc(self.fprs, self.tprs, reorder=False)
def refine_with_unexpectedness(data_set, classes_dict, preY, Ytrue, unexpected_rules): print('Refine with unexpected rules...') y_pred = np.copy(preY) for i in range(data_set.size()): x = data_set.get_transaction(i) for r in unexpected_rules: if r.satisfy_rule(x, is_lhs=True): label = r.right_items[0] y_pred[i] = classes_dict[label] print(f1_score(Ytrue, y_pred, average=None)) if (data_set.number_of_classes() <= 2): fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten()) print(auc(fpr, tpr))
def plot_roc(true_labels, pred_probs, fig_title='', savepath=''): false_positive_rate, true_positive_rate, _ = roc_curve(true_labels, pred_probs[:, 1], pos_label=1) roc_auc = auc(false_positive_rate, true_positive_rate) plt.figure() plt.title(fig_title) plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.4f' % roc_auc) plt.plot([0, 1], [0, 1], 'r--') plt.legend(loc='lower right') plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') if savepath != '': plt.savefig(savepath) plt.show() return ''
def handle(self, *args, **options): filepath = options['file'] print filepath x = [] y = [] with open(filepath, 'rb') as csvfile: data = csv.reader(csvfile, delimiter=',') for row in data: x.append(float(row[0])) y.append(int(row[1])) print(x) print(y) fpr, tpr, _ = roc_curve(y, x) roc_auc = auc(fpr, tpr) plt.figure() lw = 2 plt.plot(fpr, tpr, color=COLOR_4, lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) for i in range(len(tpr)): print "tpr: %s fpr: %s thres: %s" % (tpr[i],fpr[i],_[i]) plt.plot([0, 1], [0, 1], color=COLOR_6, lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") base = os.path.basename(filepath) os.path.splitext(base)[0] savefig('%s-roc.png' % base)
Ytest = Ytest.flatten() class_count = train_data_set.number_of_classes() unexpected_rules = IOHelper.load_json_object(config.get_value('rules')) refined_unexpected_rules = filter_association_rules(unexpected_rules) print('svm testing...') svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1) svc_model.fit(X_train.relation_matrix, Y_train.values.flatten()) svc_y_pred = svc_model.predict(Xtest) print(f1_score(Ytest, svc_y_pred, average=None)) if (class_count <= 2): fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten()) print(auc(fpr, tpr)) refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred, Ytest, refined_unexpected_rules) print('Random forest testing...') rf_model = RandomForestClassifier(n_estimators=20, random_state=1) rf_model.fit(X_train.relation_matrix, Y_train.values.flatten()) rf_y_pred = rf_model.predict(Xtest) print(f1_score(Ytest, rf_y_pred, average=None)) if (class_count <= 2): fpr, tpr, _ = roc_curve(Ytest, rf_y_pred.flatten()) print(auc(fpr, tpr)) refine_with_unexpectedness(test_data_set, Y_train.item_dict, rf_y_pred,
def area_under_the_roc_curve(yTrue, yPred): fpr, tpr, _ = roc_curve(yTrue, yPred) AUC = auc(fpr, tpr) return AUC, fpr, tpr
False) # build tesing inputs and labels X_test, y_test, test_labels = build_inputs(training_files, activity_labels, training_dict, True, False, False) random_state = np.random.RandomState(0) classifier = OneVsRestClassifier( svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) fpr = dict() tpr = dict() roc_auc = dict() for i in range(3): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) plt.figure() lw = 2 plt.plot(fpr[2], tpr[2], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2]) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05])
def showROC(prediction, target): nGestures = target.shape[1] n_classes = nGestures y_test = target y_score = prediction # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) ############################################################################## # Plot ROC curves for the multiclass problem # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), linewidth=2) plt.plot(fpr["macro"], tpr["macro"], label='macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), linewidth=2) for i in range(n_classes): plt.plot(fpr[i], tpr[i],label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') #plt.title('Some extension of Receiver operating characteristic to multi-class') plt.legend(loc="lower right") plt.show()
# Export ppis = df_corr[(df_corr["prot_corr"].abs() > .5) | (df_corr["gexp_corr"].abs() > .5) | (df_corr["crispr_corr"].abs() > .5)] ppis.round(4).to_csv(f"{RPATH}/PPInteractions_filtered.csv", index=False) rc_dict = dict() for y in ["corum", "biogrid", "string", "huri"]: rc_dict[y] = dict() for x in ["prot", "gexp", "crispr", "merged"]: rc_df = df_corr.sort_values(f"{x}_pvalue")[y].reset_index( drop=True).copy() rc_df_y = np.cumsum(rc_df) / np.sum(rc_df) rc_df_x = np.array(rc_df.index) / rc_df.shape[0] rc_df_auc = auc(rc_df_x, rc_df_y) rc_dict[y][x] = dict(x=list(rc_df_x), y=list(rc_df_y), auc=rc_df_auc) rc_pal = dict( biogrid=sns.color_palette("tab20c").as_hex()[0:4], corum=sns.color_palette("tab20c").as_hex()[4:8], string=sns.color_palette("tab20c").as_hex()[8:12], huri=sns.color_palette("tab20c").as_hex()[12:16], ) # Recall curves _, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=600)