def metric_info(self, preds, trues): fps, tps = _binary_clf_curve(trues, preds) if len(fps) > 2: optimal_idxs = np.where(np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True])[0] fps = fps[optimal_idxs] tps = tps[optimal_idxs] tps = np.r_[0, tps] fps = np.r_[0, fps] if fps[-1] <= 0: logger = getLogger() logger.warning("No negative samples in y_true, " "false positive value should be meaningless") fpr = np.repeat(np.nan, fps.shape) else: fpr = fps / fps[-1] if tps[-1] <= 0: logger = getLogger() logger.warning("No positive samples in y_true, " "true positive value should be meaningless") tpr = np.repeat(np.nan, tps.shape) else: tpr = tps / tps[-1] result = sk_auc(fpr, tpr) return result
def auc_(trues, preds): r"""AUC_ (also known as Area Under Curve) is used to evaluate the two-class model, referring to the area under the ROC curve .. _AUC: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve Note: This metric does not calculate group-based AUC which considers the AUC scores averaged across users. It is also not limited to k. Instead, it calculates the scores on the entire prediction results regardless the users. .. math:: \mathrm {AUC} = \frac{\sum\limits_{i=1}^M rank_{i} - \frac {{M} \times {(M+1)}}{2}} {{{M} \times {N}}} :math:`M` is the number of positive samples. :math:`N` is the number of negative samples. :math:`rank_i` is the ascending rank of the ith positive sample. """ fps, tps = _binary_clf_curve(trues, preds) if len(fps) > 2: optimal_idxs = np.where( np.r_[True, np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True])[0] fps = fps[optimal_idxs] tps = tps[optimal_idxs] tps = np.r_[0, tps] fps = np.r_[0, fps] if fps[-1] <= 0: logger = getLogger() logger.warning("No negative samples in y_true, " "false positive value should be meaningless") fpr = np.repeat(np.nan, fps.shape) else: fpr = fps / fps[-1] if tps[-1] <= 0: logger = getLogger() logger.warning("No positive samples in y_true, " "true positive value should be meaningless") tpr = np.repeat(np.nan, tps.shape) else: tpr = tps / tps[-1] return sk_auc(fpr, tpr)
def plot_roc_score(label_test, score,name): x, y, _ = roc_curve(label_test, score) roc_auc = sk_auc(x, y) plt.figure() lw = 2 plt.plot(x, y, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") # name = re.sub('.tsv','_roc.png',name) name = name+'/roc.png' plt.savefig(name) return
def get_auc(fp, tp, adjusted=False): """Calculate AUC from the FP and TP arrays of an ROC curve.""" auc_val = sk_auc(fp, tp) if adjusted: auc_val -= 0.5 return auc_val
# ax = plt.gca() # ax.axes.spines['right'].set_visible(False) # ax.axes.spines['top'].set_visible(False) # fig.set_figwidth(8) # fig.set_figheight(6) # plt.savefig("figure/igg_native_roc.pdf", bbox_inches='tight') print("Drawing PR") fig = plt.figure() for group in datasets: prec, rec, _ = precision_recall_curve(true_positive_mask, datasets[group]) plt.step(rec, prec, alpha=0.8, label="%s (%0.3f AUC)" % (' '.join(group.split("_")).title(), sk_auc(rec, prec))) plt.xlim(-0.01, 1) plt.ylim(0, 1.01) plt.legend() plt.xlabel("Recall", fontsize=16) plt.ylabel("Precision", fontsize=16) plt.title("Precision-Recall Curve", fontsize=18) ax = plt.gca() ax.axes.spines['right'].set_visible(False) ax.axes.spines['top'].set_visible(False) fig.set_figwidth(8) fig.set_figheight(6) plt.savefig("figure/igg_native_prec_rec.pdf", bbox_inches='tight') native_path = "analysis/results/igg-native-fit-prior.db"
def auc(expected, found): fpr, tpr, thresholds = roc_curve(expected, found) return sk_auc(fpr, tpr)
postive_cnt = sum(labels) negative_cnt = len(labels) - postive_cnt total_case = postive_cnt * negative_cnt pos_histogram = [0 for _ in range(n_bins)] neg_histogram = [0 for _ in range(n_bins)] bin_width = 1.0 / n_bins for i in range(len(labels)): nth_bin = int(preds[i]/bin_width) if labels[i] == 1: pos_histogram[nth_bin] += 1 else: neg_histogram[nth_bin] += 1 accumulated_neg = 0 satisfied_pair = 0 for i in range(n_bins): satisfied_pair += (pos_histogram[i]*accumulated_neg + pos_histogram[i]*neg_histogram[i]*0.5) accumulated_neg += neg_histogram[i] return satisfied_pair / float(total_case) if __name__ == '__main__': y = np.array([1, 0, 0, 0, 1, 0, 1, 0]) pred = np.array([0.9, 0.8, 0.3, 0.1, 0.4, 0.9, 0.66, 0.7]) fpr, tpr, thresholds = roc_curve(y, pred, pos_label=1) print("sklearn:", sk_auc(fpr, tpr)) print("reimplementation:", auc(y, pred))
plt.ylabel("TPR", fontsize=16) plt.xlim(-0.01, 1) plt.ylim(0, 1.01) plt.title("ROC Curve", fontsize=18) ax = plt.gca() plt.legend(loc=4) ax.axes.spines['right'].set_visible(False) ax.axes.spines['top'].set_visible(False) plt.savefig("figure/serum_roc.pdf", bbox_inches='tight') print("Drawing PR") plt.figure() for group in datasets: prec, rec, _ = precision_recall_curve(true_positive_mask, datasets[group]) plt.step(rec, prec, alpha=0.8, label="%s (%0.3f AUC)" % (' '.join(group.split("_")).title(), sk_auc( rec, prec))) plt.legend() plt.xlabel("Recall", fontsize=16) plt.ylabel("Precision", fontsize=16) plt.title("Precision-Recall Curve", fontsize=18) ax = plt.gca() ax.axes.spines['right'].set_visible(False) ax.axes.spines['top'].set_visible(False) plt.savefig("figure/serum_prec_rec.pdf", bbox_inches='tight') print "Generating Tetra-antennary Plot" labeler = plotting.NGlycanLabelProducer( glypy.GlycanComposition.parse("{Fuc^Me:1; Hex^Me:7; HexNAc^Me:6; Neu5NAc^Me:1}$C1H4")) color_cycle = iter(("red", "green", "blue", "orange")).next
def get_F1_event(label, pred): ''' Get F1-Event :param label: binary ground truth label :param pred: prediction :return: met a structure of F1E metric ''' gt_segs, n_gt_segs = get_segs( label ) # n_gt_segs is the number of true events in whole video sequence bin_pred = pred > 0 bin_pred = bin_pred.astype(np.int32) pred_segs, n_pred_segs = get_segs( bin_pred ) # pred_segs is the number of detected events in the whole video sequence gt_frames = np.where(label > 0)[0] pred_frames = np.where(pred > 0)[0] ths = np.arange(0, 1.01, 0.01) # init n_th = ths.size TPP = np.zeros(n_th) # TP for precision TPR = np.zeros(n_th) # TP for recall # Compute overlap score for GT olScoreGt = np.zeros(n_gt_segs) for i in range(0, n_gt_segs): seg = gt_segs[i] olScoreGt[i] = np.intersect1d( seg, pred_frames ).size / seg.size # each segment part have one calculate # Compute overlap score for predicted segments olScorePr = np.zeros(n_pred_segs) for i in range(n_pred_segs): seg = pred_segs[i] olScorePr[i] = np.intersect1d(seg, gt_frames).size / seg.size # compute TP and recall and precision for iOl in range(n_th): TPR[iOl] = np.sum(olScoreGt >= ths[iOl]) TPP[iOl] = np.sum(olScorePr >= ths[iOl]) ER = TPR / n_gt_segs # ER is the ratio of correctly detected events over the true events. EP = TPP / n_pred_segs # EP is the ratio of correctly detected events over the detected events. # Compute f1-Event F1E_curve = 2 * ER * EP / (ER + EP) # Compute AUC under the F1E curve x = np.concatenate((np.array([0]), ths, np.array([1])), axis=0) y = np.concatenate((np.array([1]), F1E_curve, np.array([0]))) # auc = AUC(x, y) auc = sk_auc(x, y, reorder=False) if np.isnan(auc): raise ValueError("AUC is NaN,(no true event exists)") # get output met = Map() met.f1EventCurve = F1E_curve met.thresholds = ths met.TP_recall = TPR met.TP_precision = TPP met.olScoreGt = olScoreGt met.olScorePr = olScorePr met.nGtSeg = n_gt_segs met.nPrSeg = n_pred_segs met.auc = auc return met