def single_experiment_false_fraction(data, gamma, nu): C = 1./ len(data) / nu model = SVDD(kernel='rbf', C=C, gamma=gamma) normal_data, anomaly_data = split_anomaly_normal_data(data) anomaly_elements_count = int(len(normal_data) * nu / (1. - nu)) rows = sample(anomaly_data.index, anomaly_elements_count) anomaly_data = anomaly_data.ix[rows] normal_train, normal_validate, normal_test = split_data_set(normal_data, 3) anomaly_train, anomaly_validate, anomaly_test = split_data_set(anomaly_data, 3) anomaly_train = concatenate([anomaly_train, anomaly_validate]) normal_train = concatenate([normal_train, normal_validate]) model.fit(np.concatenate([anomaly_train, normal_train])) anomaly_prediction = model.decision_function(anomaly_test) normal_prediction = model.decision_function(normal_test) false_anomaly = mean(normal_prediction < 0) false_normal = mean(anomaly_prediction > 0) prediction = concatenate([anomaly_prediction, normal_prediction]) true_labels = array([1] * len(anomaly_prediction) + [-1] * len(normal_prediction)) auc_score = average_precision_score(true_labels, -1 * prediction) train_data = concatenate([anomaly_train, normal_train]) slice_score = slice_probability_metric(model, train_data) support_score = support_vectors_metric(model, train_data, nu) smote_score = validate_classifier_by_random_points(model, train_data, (1. - nu)/nu) vc_score = combinatorial_dimension_metric(model, train_data) kernel_score = kernel_metric(model, train_data) return false_anomaly, false_normal, auc_score, \ slice_score, smote_score, vc_score, support_score, kernel_score
def main(argv): label_file = argv[0] score_file = argv[1] output_file = argv[2] # load csv file labels = [] # with open('/storage/phuongdv/vc-data/te.csv') as csvfile: # reader = csv.DictReader(csvfile, delimiter=' ') # for row in reader: # labels.append(int(row["Label"])) with open(label_file) as csvfile: for line in csvfile: row = line.split(' ') labels.append(int(row[0])) lines = [line.rstrip('\n') for line in open(score_file)] scores = [] for line in lines: scores.append(float(line)) # Compute fpr, tpr, thresholds and roc auc fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) roc_auc = auc(fpr, tpr) # pre, rec, _ = precision_recall_curve(labels, scores, pos_label=1) average_precision_macro = average_precision_score(labels, scores) average_precision_micro = average_precision_score(labels, scores, average="micro") log_loss_ffm = log_loss(labels, scores) print("log loss : {}".format(log_loss_ffm)) print("AUC : {}".format(roc_auc)) print("PR|AUC_micro: {}, and PR_AUC_macro:{}".format(average_precision_micro, average_precision_macro))
def compute_results(query_results, query_relevant): y_scores = [] # list of scores from the search engine for each (query, document) couple y_true = [] # 1 if the document is relevant for the query 0 otherwise nb_queries = 0 for result in query_results: if result[0] in query_relevant.keys(): nb_queries += 1 y_scores_temp = [] y_true_temp = [] pred_list = result[2] true_list = query_relevant[result[0]] for doc in pred_list: y_scores.append(doc[1]) y_scores_temp.append(doc[1]) if doc[0] in true_list: y_true.append(1) y_true_temp.append(1) else: y_true.append(0) y_true_temp.append(0) # We print the AP score for each single query print(result[0], average_precision_score(y_true_temp,y_scores_temp)) precision, recall, thresholds = precision_recall_curve(y_true, y_scores) AP = average_precision_score(y_true, y_scores) return nb_queries, precision, recall, AP
def evaluate_multiple(ground_truths, prediction_scores, compute_micro_macro_avg=False): """ :param ground_truths: 1-d array annotated with class labels start from 0, e.g. gt: [0, 0, 1, 3, 2, 1, 0] :param prediction_scores: 2-d array recorded the corresponding probability scores for each class :param compute_micro_macro_avg: switch if the micro and macro average roc are needed :return: Dictory with number of class: false_positive_rates, true_positive_rates, thresholds, roc_aucs """ # Check dimension if len(prediction_scores.shape) != 2: print 'The dimension of \'prediction_scores\' should be 2.' return N = prediction_scores.shape[0] M = prediction_scores.shape[1] precisions = {} recalls = {} thresholds = {} avg_precisions = {} if compute_micro_macro_avg: gt_label_array = [] prediction_score_array = [] for class_label in range(0, M): # Generate Class Label ground_truth_label = np.zeros(N, dtype=int) idx = (ground_truths == class_label) ground_truth_label[idx] = 1 # Extract positive scores prediction_score = prediction_scores[:, class_label] # Compute ROC curve precision, recall, threshold = precision_recall_curve(ground_truth_label, prediction_score) avg_precision = average_precision_score(ground_truth_label, prediction_score) precisions[class_label] = precision recalls[class_label] = recall thresholds[class_label] = threshold avg_precisions[class_label] = avg_precision if compute_micro_macro_avg: gt_label_array.append(ground_truth_label) prediction_score_array.append(prediction_score) if compute_micro_macro_avg: gt_label_array = np.asarray(gt_label_array) prediction_score_array = np.asarray(prediction_score_array) # Compute Micro Avg. precisions["micro"], recalls["micro"], _ = precision_recall_curve(gt_label_array.ravel(), prediction_score_array.ravel()) avg_precisions["micro"] = average_precision_score(gt_label_array, prediction_score_array, average="micro") return precisions, recalls, thresholds, avg_precisions
def calc_precision_recall_fmeasure(self): """ Computes Precision, Recall, F-measure and Support """ # precision, recall, F-measure and support for each class for a given thresholds for threshold in [10, 30, 50]: result = precision_recall_fscore_support(self.y_true, prediction_to_binary(self.y_pred, threshold)) self.scores['Precision ' + str(threshold) + '%'] = result[0] self.scores['Recall ' + str(threshold) + '%'] = result[1] self.scores['F-score ' + str(threshold) + '%'] = result[2] self.scores['Support'] = result[3] # Computes precision-recall pairs for different probability thresholds self.precision, self.recall, self.thresholds = precision_recall_curve(self.y_true, self.y_pred) #print "precision = " + str(precision) #print "recall = " + str(recall) #print "thresholds = " + str(thresholds) # Compute the area under the precision-recall curve (average precision from prediction scores) self.scores['Precision-Recall AUC'] = average_precision_score(self.y_true, self.y_pred) self.scores['Weighted Precision'] = average_precision_score(self.y_true, self.y_pred, average='weighted') # weighted average precision by support (the number of true instances for each label). self.scores['Average Recall'] = np.average(self.recall) self.scores['Average Threshold'] = np.average(self.thresholds) return
def plotPrc(clfName, folds, outdir): y_tests = [] y_scores = [] plt.clf() for i, (clf, X_test, y_test, _, _, _, _,_,_,_) in enumerate(folds): try: y_score = clf.decision_function(X_test) except AttributeError: y_score = clf.predict_proba(X_test)[:, 0] precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=POSTIVE_LABEL) y_tests.extend(y_test) y_scores.extend(y_score) try: area = average_precision_score(y_test, y_score) except ValueError: area = 0.0 clf.prc_auc = area plt.plot(recall, precision, label='Fold %d, AUC = %0.2f' % (i, area), lw=1) precision, recall, _ = precision_recall_curve(y_tests, y_scores, pos_label=POSTIVE_LABEL) try: area = average_precision_score(y_tests, y_scores) except ValueError: area = 0.0 plt.plot(recall, precision, 'k--', label='Mean, AUC = %0.2f' % (area), lw=2) plt.title('Precision-Recall: %s\n%s'%(clfName,outdir.name.replace("_"," "))) plt.xlabel('Recall') plt.ylabel('Precision') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.legend(loc="lower center", prop=legendprop) plt.savefig(str(outdir/(clfName.replace(" ","_")+'_precision-recall.png')))
def run(tweets, classifications, classifier=None): if len(tweets)!=len(classifications): raise ValueError('Error: Tweet population size and classifications size not matching.') population = utils.prepare_entr_tweets(tweets, classifications, 2) vectorizer = FeatureUnion([('tfidf', TfidfVectorizer( stop_words='english', tokenizer=EnglishTokenizer(), ngram_range=(1, 3), use_idf=False)), ('sent', Vectorizer( stop_words='english', tokenizer=EnglishTokenizer(), ngram_range=(1, 3),))]) if classifier is None: clf = MultinomialNB() else: clf = classifier pipeline = Pipeline([('vect', vectorizer), ('clf', clf)]) pipeline.fit(population['train_tweets'], y=population['train_classif']) predicted = pipeline.predict(population['val_tweets']) metrics = precision_recall_fscore_support(population['val_classif'], predicted, average='macro', pos_label=None) print("Exactitud:{0}\nPrecision:{1}\nRecall:{2}\nF1:{3}".format( accuracy_score(population['val_classif'], predicted), metrics[0], metrics[1], metrics[2])) score = pipeline.predict_proba(population['val_tweets'])[:, 0] print("AUC:{0}".format(average_precision_score(population['val_classif_bin'], score, average="micro"))) precision = dict() recall = dict() average_precision = dict() # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve( population['val_classif_bin'], score) average_precision["micro"] = average_precision_score(population['val_classif_bin'], score, average="micro") # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall curve') plt.legend(loc="lower right") plt.show()
def Evaluate_Ranking(Direct_llista,train_or_val): #Funció declarada passant com a paràmetres fitxer_anot = open(ruta_abs+"\TerrassaBuildings900\train\Annotation_"+train_or_val+".txt" , "r") #Obrim els arxius quan el valor de train_or_val sigui = a 'train'. fitxer_anot2 = open(ruta_abs+"\TerrassaBuildings900\val\Annotation_"+train_or_val+".txt" , "r") #Obrim l'arxiu quan el valor de train_or_val sigui = a 'val'. Final_file = open(ruta_abs+"\files\Average_precision_"+train_or_val+".txt", "w") #Obrim l'arxiu on escriurem el AP per cada consulta Final_file2 = open(ruta_abs+"\files\Mean_average_precision_"+train_or_val+".txt", "w") ##Obrim l'arxiu on escriurem el MAP per cada consulta #Franc: a estas las creo para que no pete el código, luego se intentará depurar Final_file_train = open(ruta_abs+"/files/final_file_train.txt" ,'w') Final_file2_train = open(ruta_abs+"/files/final_file_train.txt" ,'w') Final_file_valid = open(ruta_abs+"/files/final_file_valid.txt" ,'w') Final_file2_valid = open(ruta_abs+"/files/final_file_valid.txt" ,'w') for line in Direct_llista: Final_file = np.random.rand(1,180) #Obrim el vector aleatori on s'inclouran el total de APs per cada consulta final = line.index("\n") #Indicació del final de línea de casa vector de AP's if train_or_val == "train": fitxer_anot = "Annotation_train.txt" APt = average_precision_score(Direct_llista,fitxer_anot) Final_file.append(APt) # A continuació escriurem en el fitxer cada línia de les APS per les imatges d'entrenamen Final_file_train.write(line[0:final] + "For Query "+line+":\t" + str(Final_file).replace("\n","").replace("[[","").replace("]]","") + "\n") else: fitxer_anot2 = "Annotation_valid.txt" APv = average_precision_score(Direct_llista,fitxer_anot2) Final_file.append(APv) # A continuació escriurem en el fitxer cada línia de les APS per les imatges de validació Final_file_valid.write(line[0:final] + "For Query "+line+":\t"+ str(Final_file).replace("\n","").replace("[[","").replace("]]","") + "\n") Final_file_train.close() #Tanquem el fitxer corresponent a les imatges d'entrenament Final_file_valid.close() #Tanquem el fitxer corresponent a les imatges de validació for line in Final_file_train: for element in line: suma_train = (sum(line)) sum_elems = (sum(element)) print("Aps d'entrenament sumats!") Final_file2 = [] #Creem el array necessari per col·locar el valor del MAP MAP_train = suma_train/sum_elems #Fem la peració per obtenir aquest valor Final_file2.append(MAP_train) #Introduïm el valor resultant dintre del array creat #A Continuació esciurem el valor resultant del MAP dintre del fitxer de sortida Final_file2_train.write(line[0:final] + "For Query "+line+":\t" + "Mean_AVerage_Precision = "+str(Final_file2).replace("\n","").replace("[[","").replace("]]","") + "\n") Final_file_train.close() #Tanquem el fitxer per on hem llegit les dades dels APS de cada consulta Final_file2_train.close() #Tanquem el ftixer per on hem esccrit els valors del MAP resultants per cada línia for line in Final_file_valid: for element in line: suma_valid = (sum(line)) sum_elems = (sum(element)) print("Aps de validació sumats!") Final_file2 = [] #Creem el array necessari per col·locar el valor del MAP MAP_valid = suma_valid/sum_elems #Fem la peració per obtenir aquest valor Final_file2.append(MAP_valid) #Introduïm el valor resultant dintre del array creat #A Continuació esciurem el valor resultant del MAP dintre del fitxer de sortida Final_file2_valid.write(line[0:final] + "For Query "+line+":\t" + "Man_AVerage_Precision = "+str(Final_file2).replace("\n","").replace("[[","").replace("]]","") + "\n") Final_file_valid.close() #Tanquem el fitxer per on hem llegit les dades dels APS de cada consulta Final_file2_valid.close() #Tanquem el ftixer per on hem esccrit els valors del lMAP resultants per cada línia
def fscore(y_test, y_score): """ :param y_test: output vector - predictions on the test set :param y_score: output vector which contains probabilities for each contained estimator :return: plot object """ # binarize output vector y_test = binarize(y_test) print('y_test binarized shape = ', np.shape(y_test)) n_classes = np.shape(y_test)[1] # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() return plt
def perfomance(y_test, y_pred, sample_weight=None, n=10): print ("P@"+str(n), Pat10(y_test, y_pred, n)) fig, axes = plt.subplots(nrows=3, figsize=(6, 15)) ax = axes[0] ax.grid(True) precision, recall, _ = precision_recall_curve(y_test,y_pred) print ("recalls_values",rec_values) prec_values = [] for v in rec_values: prec_values.append(max(precision[recall > v])) print ("precision_values", prec_values) print ("average_precision_score", average_precision_score(y_test, y_pred, sample_weight=sample_weight)) print ("roc_auc_score", roc_auc_score(y_test, y_pred)) ax.step(recall, precision, color='b', alpha=0.2, where='post') ax.fill_between(recall, precision, step='post', alpha=0.2, color='b') ax.set_xlabel('Recall', fontsize=10) ax.set_ylabel('Precision', fontsize=10) ax.set_ylim([0.0, 1.05]) ax.set_xlim([0.0, 1.0]) ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=15) ax.set_title('2-class Precision-Recall curve: AP={0:0.3f}'.format( average_precision_score(y_test, y_pred, sample_weight=sample_weight)), fontsize=25) ax = axes[1] ax.grid(True) fpr, tpr, _ = roc_curve(y_test, y_pred, sample_weight=sample_weight) ax.plot(fpr, tpr) ax.set_title('ROC curve: roc_auc ={0:0.3f}'.format( roc_auc_score(y_test, y_pred)), fontsize=25) ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=15) ax.set_xlabel('FPR', fontsize=10) ax.set_ylabel('TPR', fontsize=10) ax = axes[2] bad_test = np.sum(y_test) good_test = len(y_test)-np.sum(y_test) ax.plot(sorted(y_pred[np.where( y_test == 0.)[0]], reverse=True), np.arange(good_test)/good_test*100, label = "good") ax.plot(sorted(y_pred[np.where( y_test == 1.)[0]]), np.arange(bad_test)/bad_test*100, label = "bad") ax.set_title('Predicted proba', fontsize=25) ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=15) fig.subplots_adjust(hspace=0.5) plt.legend() plt.grid(True) plt.show() return precision, recall
def plotCurve(arr): X = arr[:, :-1] y = arr[:, -1] # Binarize the output y = label_binarize(y, classes=[0,1]) n_classes = y.shape[1] # Add noisy features random_state = np.random.RandomState(0) n_samples, n_features = X.shape X = np.c_[X, random_state.randn(n_samples, 150 * n_features)] # Split into training and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state) # Run classifier classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') print(recall) print(precision) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.05]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})'''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})'''.format(i, average_precision[i])) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show()
def train_multilabel(features, targets, classes, train_split, test_split, C=1.0, ignore_hard_examples=True, after_ReLU=False, normalize_L2=False): print('\nHyperparameters:\n - C: {}\n - after_ReLU: {}\n - normL2: {}'.format(C, after_ReLU, normalize_L2)) train_APs = [] test_APs = [] for class_id in range(len(classes)): classifier = SVC(C=C, kernel='linear') # http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html if ignore_hard_examples: train_masks = (targets[train_split][:,class_id] != 0).view(-1, 1) train_features = torch.masked_select(features[train_split], train_masks.expand_as(features[train_split])).view(-1,features[train_split].size(1)) train_targets = torch.masked_select(targets[train_split], train_masks.expand_as(targets[train_split])).view(-1,targets[train_split].size(1)) test_masks = (targets[test_split][:,class_id] != 0).view(-1, 1) test_features = torch.masked_select(features[test_split], test_masks.expand_as(features[test_split])).view(-1,features[test_split].size(1)) test_targets = torch.masked_select(targets[test_split], test_masks.expand_as(targets[test_split])).view(-1,targets[test_split].size(1)) else: train_features = features[train_split] train_targets = targets[train_split] test_features = features[test_split] test_targets = features[test_split] if after_ReLU: train_features[train_features < 0] = 0 test_features[test_features < 0] = 0 if normalize_L2: train_norm = torch.norm(train_features, p=2, dim=1).unsqueeze(1) train_features = train_features.div(train_norm.expand_as(train_features)) test_norm = torch.norm(test_features, p=2, dim=1).unsqueeze(1) test_features = test_features.div(test_norm.expand_as(test_features)) train_X = train_features.numpy() train_y = (train_targets[:,class_id] != -1).numpy() # uses hard examples if not ignored test_X = test_features.numpy() test_y = (test_targets[:,class_id] != -1).numpy() classifier.fit(train_X, train_y) # train parameters of the classifier train_preds = classifier.predict(train_X) train_acc = accuracy_score(train_y, train_preds) * 100 train_AP = average_precision_score(train_y, train_preds) * 100 train_APs.append(train_AP) test_preds = classifier.predict(test_X) test_acc = accuracy_score(test_y, test_preds) * 100 test_AP = average_precision_score(test_y, test_preds) * 100 test_APs.append(test_AP) print('class "{}" ({}/{}):'.format(classes[class_id], test_y.sum(), test_y.shape[0])) print(' - {:8}: acc {:.2f}, AP {:.2f}'.format(train_split, train_acc, train_AP)) print(' - {:8}: acc {:.2f}, AP {:.2f}'.format(test_split, test_acc, test_AP)) print('all classes:') print(' - {:8}: mAP {:.4f}'.format(train_split, sum(train_APs)/len(classes))) print(' - {:8}: mAP {:.4f}'.format(test_split, sum(test_APs)/len(classes)))
def fscore_plot(classifier, X_test, y_test): # Binarize the output n_classes = max(y_test) - min(y_test) + 1 y_test = label_binarize(y_test, classes=list(range(0,n_classes))) y_score = classifier.predict_proba(X_test) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0], label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0])) plt.legend(loc="lower left") plt.show() # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() return plt
def test_average_precision_score(self): result = self.df.metrics.average_precision_score(average='weighted') expected = metrics.average_precision_score(self.target, self.decision, average='weighted') self.assertAlmostEqual(result, expected) # curve, _, _ = self.df.metrics.precision_recall_curve() # self.assertEqual(result, curve.mean()) result = self.df.metrics.average_precision_score(average=None) expected = metrics.average_precision_score(self.target, self.decision, average=None) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_almost_equal(result.values, expected)
def test_score_scale_invariance(): # Test that average_precision_score and auc_score are invariant by # the scaling or shifting of probabilities y_true, _, probas_pred = make_prediction(binary=True) roc_auc = auc_score(y_true, probas_pred) roc_auc_scaled = auc_score(y_true, 100 * probas_pred) roc_auc_shifted = auc_score(y_true, probas_pred - 10) assert_equal(roc_auc, roc_auc_scaled) assert_equal(roc_auc, roc_auc_shifted) pr_auc = average_precision_score(y_true, probas_pred) pr_auc_scaled = average_precision_score(y_true, 100 * probas_pred) pr_auc_shifted = average_precision_score(y_true, probas_pred - 10) assert_equal(pr_auc, pr_auc_scaled) assert_equal(pr_auc, pr_auc_shifted)
def PR_multi_class(data_train, data_test, data_test_vectors): # Binarize the output y_train_label = label_binarize(data_train.target, classes=[0, 1, 2]) n_classes = y_train_label.shape[1] random_state = np.random.RandomState(0) # shuffle and split training and test sets X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5, random_state=random_state) # Learn to predict each class against the other classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state)) classifier.fit(X_train, y_train) y_pred_score = classifier.decision_function(data_test_vectors) y_test_label = label_binarize(data_test.target, classes=[0, 1, 2]) # Compute Precision-Recall and plot curve precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i]) average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i]) # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel()) average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro") # Plot Precision-Recall curve for each class plt.clf() # plt.plot(recall["micro"], precision["micro"], # label='micro-average PR curve (area = {0:0.2f})' # ''.format(average_precision["micro"])) for i in range(n_classes): plt.plot(recall[i], precision[i], label='PR curve of class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall curve of multi-class') plt.legend(loc="lower right") plt.show() return 0
def get_roc_score(edges_pos, edges_neg, score_matrix, apply_sigmoid=False): # Edge case if len(edges_pos) == 0 or len(edges_neg) == 0: return (None, None, None) # Store positive edge predictions, actual values preds_pos = [] pos = [] for edge in edges_pos: if apply_sigmoid == True: preds_pos.append(sigmoid(score_matrix[edge[0], edge[1]])) else: preds_pos.append(score_matrix[edge[0], edge[1]]) pos.append(1) # actual value (1 for positive) # Store negative edge predictions, actual values preds_neg = [] neg = [] for edge in edges_neg: if apply_sigmoid == True: preds_neg.append(sigmoid(score_matrix[edge[0], edge[1]])) else: preds_neg.append(score_matrix[edge[0], edge[1]]) neg.append(0) # actual value (0 for negative) # Calculate scores preds_all = np.hstack([preds_pos, preds_neg]) labels_all = np.hstack([np.ones(len(preds_pos)), np.zeros(len(preds_neg))]) roc_score = roc_auc_score(labels_all, preds_all) # roc_curve_tuple = roc_curve(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) # return roc_score, roc_curve_tuple, ap_score return roc_score, ap_score
def GB_classifier_model_search(X, y, m_label): '''runs grid search for the gradient boosting classifer''' #split 80/20 train test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #create param grid for search param_grid = [{'learning_rate': [.01, .001], 'n_estimators': [1000], 'max_depth': [3,5,7]}] GB = GradientBoostingClassifier() print 'running GradientBoostingClassifier with grid search...' GBc = GridSearchCV(GB, param_grid, verbose = 2, cv = 2, n_jobs = -1) #2 k-folds GBc.fit(X_train, y_train) pred = GBc.predict_proba(X_test) #get back probabilities pred2 = GBc.predict(X_test) #get back predictions fpr, tpr, thresholds = roc_curve(y_test, pred[:,1]) #get the AUC AUC = roc_auc_score(y_test, pred[:,1]) #get the AUC for precision and recall curve AUC2 = average_precision_score(y_test, pred[:,1]) recall = recall_score(y_test, pred2) precision = precision_score(y_test, pred2) #plot AUC plt.plot(fpr, tpr, label = '{} AUC = {}'.format(m_label,round(AUC,3))) v = np.linspace(0,1) plt.plot(v,v, linestyle = '--', color = 'k') plt.xlabel("False Postive Rate") plt.ylabel("True Postive Rate") plt.title('ROC Curve') plt.xlim(-0.05,1) plt.ylim(0,1.05) plt.axhline(1, color = 'k', linestyle = '--') plt.axvline(0, color = 'k', linestyle = '--') plt.legend() return GBc, recall, AUC, precision, AUC2
def estimate_model(positive_data_matrix=None, negative_data_matrix=None, target=None, estimator=None, n_jobs=4): X, y = make_data_matrix(positive_data_matrix=positive_data_matrix, negative_data_matrix=negative_data_matrix, target=target) logger.info('Test set') logger.info(describe(X)) logger.info('-' * 80) logger.info('Test Estimate') predictions = estimator.predict(X) margins = estimator.decision_function(X) logger.info(classification_report(y, predictions)) apr = average_precision_score(y, margins) logger.info('APR: %.3f' % apr) roc = roc_auc_score(y, margins) logger.info('ROC: %.3f' % roc) logger.info('Cross-validated estimate') for scoring in ['accuracy', 'precision', 'recall', 'f1', 'average_precision', 'roc_auc']: scores = cross_validation.cross_val_score(estimator, X, y, cv=5, scoring=scoring, n_jobs=n_jobs) logger.info('%20s: %.3f +- %.3f' % (scoring, np.mean(scores), np.std(scores))) return roc, apr
def bio_classification_report(y_true, y_pred): lb = LabelBinarizer() y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true))) y_pred_combined = list(chain.from_iterable(y_pred)) tagset = set(lb.classes_) - {'O'} tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1]) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined)) print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None), roc_auc_score(y_true_combined, y_pred_combined, average=None)) #plt.figure() #fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined) #area = auc(fpr, tpr) #plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area)) #plt.legend(loc=4) #plt.savefig('sub3.jpg') return classification_report( 1 - y_true_combined, [0 if v > 0.1 else 1 for v in y_pred_combined], labels=[class_indices[cls] for cls in tagset], target_names=tagset, )
def get_auprc(predictions_true, predictions_false): predictions = predictions_true + predictions_false labels = [1] * len(predictions_true) + [0] * len(predictions_false) y_scores = numpy.array(predictions) y_true = numpy.array(labels) auprc = average_precision_score(y_true, y_scores) return auprc
def plot_precision_recall(self, X_test, y_test, infos="", outfile="precision_recall.png"): """plot precicion-recall curve""" if self.trained: try: y_score = self.clf.decision_function(X_test) except: y_score = self.clf.predict_proba(X_test)[:, 1] precision, recall, _ = precision_recall_curve(y_test, y_score) average_precision = average_precision_score( y_test, y_score, average="micro") # Plot Precision-Recall curve for each class plt.clf() plt.plot(recall, precision, label='Average Precision-recall curve (area = {0:0.2f})' ''.format(average_precision)) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall curve for %s (%s)' % (self.method, infos)) plt.legend(loc="lower right") plt.savefig(outfile) else: raise ValueError("Classifier is not trained")
def plot_pr(y_true_s, y_score, thresholds): # Plot of a ROC curve for a range of threshold values plt.figure() for t in thresholds: y_true = y_true_s.copy() y_true[y_true <= t] = 1 y_true[y_true != 1] = 0 # Compute ROC curve and ROC area recall = dict() precision = dict() pr_auc = dict() # Compute micro-average ROC curve and ROC area # Compute micro-average ROC curve and ROC area precision["micro"], recall["micro"], _ = precision_recall_curve( y_true, y_score, pos_label=1) pr_auc["micro"] = average_precision_score(y_true, y_score, average="micro") plt.plot(recall['micro'], precision['micro'], label='t = %0.2f A, area = %0.2f)' % (t, pr_auc['micro'])) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision Recall Curve') plt.legend(loc="lower right") plt.show()
def main(): x, y = readData("C:/Users/marro/Repo/CS584/Generative_Learning/Data/banknote/data_banknote_authentication.txt",",",scale=False) #shuffle p = np.random.permutation(len(x)) x = x[p] y = y[p] # encode class labels classes, y = np.unique(y, return_inverse=True) print("Training accuracy: {}".format(getAccuracy(y,classifyAll(x,x,y),1))) print("Kfold Accuracy, recall, precission,tp,tn,fp,fn: {}".format(kfoldCrossValidation(x,y, 10, 1))) #precission recal curve precision = dict() recall = dict() average_precision = dict() for i in range(0,1): precision[i], recall[i], _ = precision_recall_curve(y, classifyAll(x,x,y)) average_precision[i] = average_precision_score(y, classifyAll(x,x,y)) # Plot Precision-Recall curve plt.clf() plt.plot(recall[0], precision[0])#, label='Precision-Recall curve') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) #plt.title('Precision-Recall Curve'.format(average_precision[0])) print(average_precision[0]) plt.legend(loc="lower left") plt.show()
def explain_perf(self, X, y, name=None): if name is None: name = gen_name_from_class(self) X, y, self.feature_names, self.feature_types = unify_data( X, y, self.feature_names, self.feature_types ) predict_fn = unify_predict_fn(self.predict_fn, X) scores = predict_fn(X) precision, recall, thresh = precision_recall_curve(y, scores) ap = average_precision_score(y, scores) abs_residuals = np.abs(y - scores) counts, values = np.histogram(abs_residuals, bins="doane") overall_dict = { "type": "perf_curve", "density": {"names": values, "scores": counts}, "scores": scores, "x_values": recall, "y_values": precision, "threshold": thresh, "auc": ap, } internal_obj = {"overall": overall_dict, "specific": None} return PRExplanation( "perf", internal_obj, feature_names=self.feature_names, feature_types=self.feature_types, name=name, )
def plot_precision_recall_curves(target, feature, ax, sample_weight=None, color="blue", fn=""): pr, rc, thresholds = metrics.precision_recall_curve(target, feature, pos_label=1, sample_weight=sample_weight) average_precision_score = metrics.average_precision_score(target, feature, sample_weight=sample_weight) ax.plot(rc, pr, label="%s : %.3f" % (fn, average_precision_score), color=color) ax.set_xlabel("Recal") ax.set_ylabel("Precision") ax.legend(loc="best")
def mean_ap(distmat, query_ids=None, gallery_ids=None, query_cams=None, gallery_cams=None): distmat = to_numpy(distmat) m, n = distmat.shape # Fill up default values if query_ids is None: query_ids = np.arange(m) if gallery_ids is None: gallery_ids = np.arange(n) if query_cams is None: query_cams = np.zeros(m).astype(np.int32) if gallery_cams is None: gallery_cams = np.ones(n).astype(np.int32) # Ensure numpy array query_ids = np.asarray(query_ids) gallery_ids = np.asarray(gallery_ids) query_cams = np.asarray(query_cams) gallery_cams = np.asarray(gallery_cams) # Sort and find correct matches indices = np.argsort(distmat, axis=1) matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) # Compute AP for each query aps = [] for i in range(m): # Filter out the same id and same camera valid = ((gallery_ids[indices[i]] != query_ids[i]) | (gallery_cams[indices[i]] != query_cams[i])) y_true = matches[i, valid] y_score = -distmat[i][indices[i]][valid] if not np.any(y_true): continue aps.append(average_precision_score(y_true, y_score)) if len(aps) == 0: raise RuntimeError("No valid query") return np.mean(aps)
def evalualte_base(dataset, DV, model): start = time.time() # Load Data to Pandas data = pd.read_csv(dataset, index_col=0) data.columns = [camel_to_snake(col) for col in data.columns] if model == 'logit': #DV y = data[str(DV)] X = data[data.columns - [str(DV)]] clf = logit_clf(dataset, DV, 'yes') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # predict class labels for the test set predicted = clf.predict(X_test) #print predicted # generate class probabilities y_score = clf.predict_proba(X_test) # generate evaluation metrics print "Model score, accuracy : %.3f" % (metrics.accuracy_score(y_test, predicted)) print "Model score, roc_auc: %.3f" % (metrics.roc_auc_score(y_test, y_score[:, 1])) print "Model score, f1: %.3f" % metrics.f1_score(y_test, predicted) print "Model score, average-precision: %.3f" % (metrics.average_precision_score(y_test, predicted)) print "Model score, precision: %.3f" % (metrics.precision_score(y_test, predicted)) print "Model score, recall: %.3f" % (metrics.recall_score(y_test, predicted)) end = time.time() print "Runtime, K-folds evaluation of base model: %.3f" % (end-start), "seconds."
def AUCprc (filename="y_y_pred.txt"): """ Compares AUC-PRC curve Args: filename: name of output file for model Returns: (nothing) the function plots a comparative AOC-PRC curve with value of AUC included """ #MODEL df = pd.read_csv(filename, sep=' ', names=["y","y_pred"]) y_predarray = np.array(df.y_pred) y_truearray = np.array(df.y) #AUCprc precision, recall, threshold = metrics.precision_recall_curve(y_truearray, y_predarray) average_precision = metrics.average_precision_score(y_truearray, y_predarray) # Plot curve plt.clf() plt.plot(recall, precision, label='case A: AUC={0:0.2f}'.format(average_precision)) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall') plt.legend(loc="lower left") plt.show() return
def check_ml_helper(drugs, disease_to_drugs, drug_to_index, list_M_similarity, pairs, classes, cv, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, split_both, output_f, model_fun, verbose, n_seed): # Get classification model clf = utilities.get_classification_model(model_type, model_fun, n_seed) all_auc = [] all_auprc = [] for i, (train, test) in enumerate(cv): file_name = None # for saving results pairs_train = pairs[train] classes_train = classes[train] pairs_test = pairs[test] classes_test = classes[test] if recalculate_similarity: drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_train, pairs_test = None, approach = "train_vs_train", file_name = file_name) else: # Using similarity scores of all drugs, not only within the subset drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_train, pairs_test = pairs_test, approach = "train_test_vs_train_test", file_name = file_name) # similar to all_vs_all above, but removes the test pair X, y = get_scores_and_labels(pairs_train, classes_train, drug_to_disease_to_scores) if recalculate_similarity: drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_test, pairs_test = None, approach = "train_vs_train", file_name = file_name) X_new, y_new = get_scores_and_labels(pairs_test, classes_test, drug_to_disease_to_scores) probas_ = clf.fit(X, y).predict_proba(X_new) fpr, tpr, thresholds = roc_curve(y_new, probas_[:, 1]) roc_auc = 100*auc(fpr, tpr) all_auc.append(roc_auc) prc_auc = 100*average_precision_score(y_new, probas_[:, 1]) all_auprc.append(prc_auc) if verbose: print "Fold:", i+1, "# train:", len(pairs_train), "# test:", len(pairs_test), "AUC: %.1f" % roc_auc, "AUPRC: %.1f" % prc_auc #if verbose: # print "AUC: %.1f (+/-%.1f):" % (numpy.mean(all_auc), numpy.std(all_auc)), all_auc if output_f is not None: output_f.write("%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n" % (n_fold, n_proportion, n_subset, model_type, prediction_type, "|".join(features), recalculate_similarity, disjoint_cv, split_both, "cv", numpy.mean(all_auc), numpy.std(all_auc), numpy.mean(all_auprc), numpy.std(all_auprc))) return numpy.mean(all_auc), numpy.mean(all_auprc)
def validation_step(_x_val_gov, _x_val_art, _y_val, writer=None): print("_x_val_gov: ", len(_x_val_gov)) print("_x_val_art: ", len(_x_val_art)) """Evaluates model on a validation set""" batches_validation = \ feed.batch_iter( list(zip(_x_val_gov, _x_val_art, _y_val)), FLAGS.batch_size, num_epochs=1, shuffle=False) _eval_counter, _eval_loss = 0, 0.0 _eval_pre_tk = [0.0] * FLAGS.top_num _eval_rec_tk = [0.0] * FLAGS.top_num _eval_F_tk = [0.0] * FLAGS.top_num true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)] valid_count_correct_one = 0 valid_count_label_one = 0 valid_count_correct_zero = 0 valid_count_label_zero = 0 valid_step_count = 0 for batch_validation in batches_validation: valid_step_count += 1 x_batch_val_gov, x_batch_val_art, y_batch_val = \ zip(*batch_validation) feed_dict = { cnn.input_x_gov: x_batch_val_gov, cnn.input_x_art: x_batch_val_art, cnn.input_y: y_batch_val, cnn.dropout_keep_prob: 1.0, cnn.is_training: False } step, \ summaries, \ scores, \ cur_loss, \ input_y = sess.run( [cnn.global_step, validation_summary_op, cnn.scores, cnn.loss, cnn.input_y], feed_dict) count_label_one, \ count_label_zero, \ count_correct_one, \ count_correct_zero = count_correct_pred(scores, input_y) valid_count_correct_one += count_correct_one valid_count_label_one += count_label_one valid_count_correct_zero += count_correct_zero valid_count_label_zero += count_label_zero print("[VALID] num_correct_answer is {} out of {}".format( count_correct_one, count_label_one)) print("[VALID] num_correct_answer is {} out of {}".format( count_correct_zero, count_label_zero)) # Prepare for calculating metrics for i in y_batch_val: true_onehot_labels.append(i) for j in scores: predicted_onehot_scores.append(j) # Predict by threshold batch_predicted_onehot_labels_ts = \ feed.get_onehot_label_threshold(scores=scores, threshold=FLAGS. threshold) for k in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(k) # Predict by topK for _top_num in range(FLAGS.top_num): batch_predicted_onehot_labels_tk = feed.\ get_onehot_label_topk(scores=scores, top_num=_top_num + 1) for i in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[_top_num].append(i) _eval_loss = _eval_loss + cur_loss _eval_counter = _eval_counter + 1 if writer: writer.add_summary(summaries, step) logger.info("[VALID_FINAL] Total Correct One Answer is {} out " "of {}".format(valid_count_correct_one, valid_count_label_one)) logger.info("[VALID_FINAL] Total Correct Zero Answer is {} " "out of {}".format(valid_count_correct_zero, valid_count_label_zero)) _eval_loss = float(_eval_loss / _eval_counter) # Calculate Precision & Recall & F1 (threshold & topK) _eval_pre_ts = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') _eval_rec_ts = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') _eval_F_ts = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') for _top_num in range(FLAGS.top_num): _eval_pre_tk[_top_num] = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[_top_num]), average='micro') _eval_rec_tk[_top_num] = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[_top_num]), average='micro') _eval_F_tk[_top_num] = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[_top_num]), average='micro') # Calculate the average AUC _eval_auc = roc_auc_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR _eval_prc = average_precision_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') return _eval_loss, _eval_auc, _eval_prc, _eval_rec_ts, \ _eval_pre_ts, _eval_F_ts, _eval_rec_tk, _eval_pre_tk, \ _eval_F_tk
def predict(args, model, tokenizer): """ Run prediction on test set. """ # Make output dir if necessary if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) # Get test data test_data = load_and_cache_dataset(args, tokenizer, 'test') test_queries = test_data["queries"] test_query_token_ids = test_data["query_token_ids"] candidate_token_ids = test_data["candidate_token_ids"] candidates = list(test_data["candidate2id"].keys()) candidate_ids = list(test_data["candidate2id"].values()) # Check if gold is available gold_available = "gold_hypernym_candidate_ids" in test_data and test_data[ "gold_hypernym_candidate_ids"] is not None test_pos_candidate_ids = test_data[ "gold_hypernym_candidate_ids"] if gold_availble else None # Write and log top k candidates for each test query. ranking_cutoff = 15 logger.info("***** Running prediction *****") logger.info(" Nb queries: {}".format(len(test_queries))) logger.info(" Ranking cutoff: {}".format(ranking_cutoff)) if gold_available: logger.info(" Evaluating ranking of candidates wrt gold hypernyms") else: logger.info( " NOT evaluating ranking of candidates (gold hypernyms not available)" ) # Accumulate average precision scores (if gold is available) ap_scores = [] # Loop over queries total_test_loss = 0.0 nb_queries = len(test_queries) for i in range(nb_queries): # Create a dataset for this query and all the candidates query_token_ids = test_query_token_ids[i] candidate_labels = [0] * len(candidate_ids) for candidate_id in test_pos_candidate_ids[i]: candidate_labels[candidate_id] = 1 eval_dataset = make_dataset(tokenizer, [query_token_ids], candidate_token_ids, [candidate_ids], candidate_labels=[candidate_labels], max_length=args.max_seq_length, pad_on_left=False, pad_token=0, pad_token_segment_id=0, mask_padding_with_zero=True) logger.info(" *** Running prediction on query {} ('{}') ***".format( i, test_queries[i])) y_probs, y_true, test_loss = get_model_predictions( args, model, eval_dataset) total_test_loss += test_loss # Get top k candidates and their scores y_scores = y_probs[:, 1] top_k_candidate_ids = np.argsort( y_scores).tolist()[-ranking_cutoff:][::-1] tok_k_scores = [y_scores[i] for i in top_k_candidate_ids] top_candidates_and_scores.append(zip(top_k_candidate_ids, top_k_scores)) # Evalute ranking if gold hypernyms are available if gold_available: y_score = y_probs[:, 1] ap = average_precision_score(y_true=y_true, y_score=y_score) ap_scores.append(ap) # FOR DEBUGGING logger.warning(" STOPPING FOR DEBUGGING PURPOSES") break # Compute average loss loss = total_test_loss / nb_queries results["loss"] = loss logger.info("***** Results *****") logger.info(" loss: {}".format(loss)) # Compute mean average precision if gold hypernyms were available if gold_available: MAP = np.mean(ap_scores) results["MAP"] = MAP logger.info(" MAP: {}".format(MAP)) # Write top k candidates and scores path_top_candidates = os.path.join( args.output_dir, "test_top_{}_candidates.tsv".format(ranking_cutoff)) path_top_scores = os.path.join( args.output_dir, "test_top_{}_scores.tsv".format(ranking_cutoff)) logger.info("Writing top {} candidates for each query to {}".format( ranking_cutoff, path_top_candidates)) logger.info("Writing top {} scores for each query to {}".format( ranking_cutoff, path_top_scores)) with open(path_top_candidates, 'w') as fc, open(path_top_scores, 'w') as fs: for i, topk in enumerate(top_candidates_and_scores): fc.write("{}\n".format("\t".join([c for (c, s) in topk]))) fs.write("{}\n".format("\t".join( ["{:.5f}".format(s) for (c, s) in topk]))) query = test_queries[i] topk_string = ', '.join( ["('{}',{:.5f})".format(c, s) for (c, s) in topk]) logger.info("{}. Top candidates for '{}': {}".format( i + 1, query, topk_string)) # Write average precision of each query if gold_available: output_eval_file = os.path.join(args.output_dir, "test_average_precision.txt") logger.info(" Writing average precision scores in {}".format( output_eval_file)) with open(output_eval_file, "w") as writer: for ap in ap_scores: writer.write("{:.5f}\n".format(ap)) return results
def train(X, y, weight_classes=True, n_iter_search=500, score='roc_auc', random_state=123): ''' Train a binary SGD classifier using a randomized grid search with given scoring metric. Parameters: X (list-like): list of normalized attachment texts y (list-like): list of validated targets (0 = red, 1 = green) weight_classes (bool): whether or not to use the “balanced” mode to adjust class weights. n_iter_search (int): number of parameter settings that are sampled. Trades off runtime vs quality of the solution. score (str): the scorer used to evaluate the predictions on the test set. `roc_auc` by default. Available options include: accuracy, roc_auc, precision, fbeta, recall. Note: for fbeta, beta is set to 1.5 to favor recall of the positive class. random_state (int): sets the random seed for reproducibility. Returns: results (dict): a dict of scoring metrics and their values best_score (float): mean cross-validated score of the best_estimator. best_estimator (sklearn estimator): estimator that was chosen by the search best_params (dict): parameter setting that gave the best results on the hold out data. ''' if weight_classes: clf = SGDClassifier(class_weight='balanced') else: clf = clf = SGDClassifier() scoring = { 'accuracy': metrics.make_scorer(metrics.accuracy_score), 'roc_auc': metrics.make_scorer(metrics.roc_auc_score), 'precision': metrics.make_scorer(metrics.average_precision_score), 'fbeta': metrics.make_scorer(metrics.fbeta_score, beta=.5), 'recall': metrics.make_scorer(metrics.recall_score) } X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, test_size=0.2, random_state=random_state) pipe = Pipeline([('vectorizer', TfidfVectorizer(stop_words='english')), ('select', SelectKBest(chi2)), ('clf', clf)]) param_dist = get_param_distribution() random_search = RandomizedSearchCV(pipe, param_distributions=param_dist, scoring=scoring, refit=score, n_iter=n_iter_search, cv=5, n_jobs=-1, verbose=1, random_state=random_state) try: random_search.fit(X_train, y_train) except Exception as e: logger.error(f"Exception occurred training a new model: \ {e}", exc_info=True) y_pred = random_search.predict(X_test) #get the col number of the positive class (i.e. green) positive_class_col = list(random_search.classes_).index(1) try: y_score = random_search.predict_proba(X_test)[:, positive_class_col] except AttributeError: y_score = random_search.decision_function(X_test) average_precision = metrics.average_precision_score(y_test, y_score) acc = metrics.accuracy_score(y_test, y_pred) try: roc_auc = metrics.roc_auc_score(y_test, y_pred) except ValueError: roc_auc = None precisions, recalls, _ = metrics.precision_recall_curve(y_test, y_score) try: auc = metrics.auc(recalls, precisions) except ValueError: auc = None fbeta = metrics.fbeta_score(y_test, y_pred, beta=1.5) recall = metrics.recall_score(y_test, y_pred) best_estimator = random_search.best_estimator_ best_params = random_search.best_params_ best_score = random_search.best_score_ result_values = [ y_pred, y_score, precisions, recall, average_precision, acc, roc_auc, auc, fbeta, recalls, best_score, best_estimator, y_test ] result_keys = [ 'y_pred', 'y_score', 'precisions', 'recall', 'average_precision', 'acc', 'roc_auc', 'auc', 'fbeta', 'recalls', 'best_score', 'best_estimator', 'y_test' ] results = {k: v for k, v in zip(result_keys, result_values)} return results, best_score, best_estimator, best_params
def get_prc(labels, y): with warnings.catch_warnings(): warnings.filterwarnings("ignore") return metrics.average_precision_score(labels.cpu().numpy(), y.detach().cpu().numpy(), average='weighted')
grad_images=return_gradients, load_and_evaluate_ckpt=sel_ckpts) if len(scores.keys()) > 1: raise RuntimeError scores = scores[0] labs = labs[0] # Create a mosaic plot_mosaic(images.astype(np.uint8), title='Images', rc=10, cc=10) plot_mosaic(labs, rc=10, cc=10, title='Labels', show_plot=True) # plot_mosaic(labels, rc=10, cc=10, title='Labels', show_plot=True) plot_mosaic(scores, rc=10, cc=10, title='Predictions', show_plot=True) # Evaluate performance map_score = metrics.average_precision_score(labs.reshape(batch_size, -1), scores.reshape(batch_size, -1)) p, r, thresh = metrics.precision_recall_curve(labs.reshape(batch_size, -1), scores.reshape(batch_size, -1), pos_label=1) plt.step(r, p, color='b', alpha=0.2, where='post') plt.fill_between(r, p, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.show() # np.savez( # version, # preds=preds, # all_scores=all_scores, # bin_labs=bin_labs,
print('score, decision tree: {}'.format(tree.score(X_test, y_test))) Dummy = DummyClassifier().fit(X_train, y_train) print('Score, dummy: {}'.format(Dummy.score(X_test, y_test))) lr = LogisticRegression(C=0.1).fit(X_train, y_train) print('Score, logistic regression: {}'.format(lr.score(X_test, y_test))) # confusion matrix pred_lr = lr.predict(X_test) confusion = confusion_matrix(y_test, pred_lr) print('Confusion Matrix for LogisticRegression:\n{}'.format(confusion)) print('F1-score for LogisticRegression: {}'.format(f1_score(y_test, pred_lr))) # for multiclass f1-score can be used with average='micro','macro' or 'weighted' print('Report:\n{}'.format(classification_report(y_test, pred_lr))) # higher threshold higher precision, lower recall precision, recall, threshold = precision_recall_curve( y_test, lr.decision_function(X_test)) close_zero = np.argmin(np.abs(threshold)) plt.plot(precision[close_zero], recall[close_zero], 'o') plt.plot(precision, recall, '-') plt.show() print('Average precision score: {}'.format( average_precision_score(y_test, lr.decision_function(X_test)))) # ROC(Receiver Operating Characteristics) False Positive Rate (FPR) vs True Positive Rate(TPR: recall) fpr, tpr, threshold = roc_curve(y_test, lr.decision_function(X_test)) plt.plot(fpr, tpr, label='ROC curve') close_zero = np.argmin(np.abs(threshold)) plt.plot(fpr[close_zero], tpr[close_zero], 'o') plt.show() # AUC (area under curve) print('AUC: {}'.format(roc_auc_score(y_test, lr.decision_function(X_test)))) # AUC is very useful on imbalanced data, but we'll need to adjust threshold. We get information that # cannot be found from accuracy score only
def main(): parser = argparse.ArgumentParser() parser.add_argument("pred_file", type=str, help="File path to list of predictions") parser.add_argument("-c", "--class_scores", type=str, default=None, help="Path to write class-specific APs") parser.add_argument("-q", "--qual", type=str, default=None, help="Path to write qualitative results") args = parser.parse_args() params = vars(args) # Load Attributes -------------------------------------------------------------------------------------------------- attr_id_to_name, attr_id_to_idx = load_attributes() idx_to_attr_id = {v: k for k, v in attr_id_to_idx.iteritems()} n_attr = len(attr_id_to_idx) # Load predictions ------------------------------------------------------------------------------------------------- # Construct a list of dicts containing: GT labels, Prediction probabilities, Image path pred_list = [] with open(params['pred_file'], 'r') as f: for _line in f: line = _line.strip() dct = json.loads(line) pred_entry = dict() pred_entry['pred_probs'] = np.asarray(dct['pred_probs'], dtype=float) # Read image_path and gt_labels from annotation anno_path = dct['anno_path'] if osp.exists( dct['anno_path']) else osp.join(DS_ROOT, dct['anno_path']) with open(anno_path) as jf: anno = json.load(jf) # Get the list of attributes this corresponds to attr_set = set(anno['labels']) attr_vec = labels_to_vec(attr_set, attr_id_to_idx) pred_entry['image_path'] = anno['image_path'] pred_entry['gt_labels'] = attr_vec pred_entry['anno_path'] = dct['anno_path'] pred_list.append(pred_entry) # Convert to matrix ------------------------------------------------------------------------------------------------ # Create a NxM matrix. Each row represents the class-probabilities for the M classes. # In case of GT, they are 1-hot encoded gt_mat = np.array([d['gt_labels'] for d in pred_list]) pred_probs_mat = np.array([d['pred_probs'] for d in pred_list]) # Drop examples where gt contains no relevant attributes (when testing on a partial set) # non_empty_gt_idx = np.where(np.sum(gt_mat, axis=1) > 0)[0] # pred_probs_mat = pred_probs_mat[non_empty_gt_idx, :] # gt_mat = gt_mat[non_empty_gt_idx, :] # Evaluate Overall Attribute Prediction ---------------------------------------------------------------------------- n_examples, n_labels = gt_mat.shape print '# Examples = ', n_examples print '# Labels = ', n_labels print 'Macro MAP = {:.2f}'.format( 100 * average_precision_score(gt_mat, pred_probs_mat, average='macro')) if params['class_scores'] is not None: cmap_stats = average_precision_score(gt_mat, pred_probs_mat, average=None) with open(params['class_scores'], 'w') as wf: wf.write('\t'.join( ['attribute_id', 'attribute_name', 'num_occurrences', 'ap']) + '\n') for idx in range(n_labels): attr_id = idx_to_attr_id[idx] attr_name = attr_id_to_name[attr_id] attr_occurrences = np.sum(gt_mat, axis=0)[idx] ap = cmap_stats[idx] wf.write('{}\t{}\t{}\t{}\n'.format(attr_id, attr_name, attr_occurrences, ap * 100.0)) if params['qual'] is not None: if not osp.exists(params['qual']): print '{} does not exist. Creating it ...'.format(params['qual']) os.mkdir(params['qual']) for pred in pred_list: image_path = pred['image_path'] im = Image.open(image_path) fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(20, 15)) ax = ax1 ax.imshow(im) ax.axis('off') ax = ax2 text_str = '' pred_probs = pred['pred_probs'] top_10_inds = np.argsort(-pred_probs)[:10] for aidx in top_10_inds: text_str += '{:<30} {:.3f}\n'.format(idx_to_attr_id[aidx], pred_probs[aidx]) ax.set_xlim(xmin=0, xmax=1) ax.set_ylim(ymin=0, ymax=1) ax.text(0.0, 0.5, text_str, fontsize='xx-large') ax.axis('off') plt.tight_layout() _, im_name = osp.split(image_path) out_path = osp.join(params['qual'], im_name) plt.savefig(out_path, bbox_inches='tight') plt.close()
print('---' * 45) # How it should look like print('---' * 45) print('How it should be:\n') print("Accuracy Score: {:.2f}".format(np.mean(undersample_accuracy))) print("Precision Score: {:.2f}".format(np.mean(undersample_precision))) print("Recall Score: {:.2f}".format(np.mean(undersample_recall))) print("F1 Score: {:.2f}".format(np.mean(undersample_f1))) print('---' * 45) undersample_y_score = log_reg.decision_function(original_Xtest) from sklearn.metrics import average_precision_score undersample_average_precision = average_precision_score( original_ytest, undersample_y_score) print('Average precision-recall score: {0:0.2f}'.format( undersample_average_precision)) from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt fig = plt.figure(figsize=(12, 6)) precision, recall, _ = precision_recall_curve(original_ytest, undersample_y_score) plt.step(recall, precision, color='#004a93', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='#48a6ff')
def classifier_plots(clf_trained, X_test, y_test, target_names:list, minority_idx:int=0, ylog:bool=False): """ Get summary plots for a trained classifier Args: clf_trained: trained sklearn clf X_test (np.ndarray): withheld test data y_test (np.ndarray): withheld test data labels target_names (list): list of target labels/names minority_idx: (int): index for the minority class (e.g. 0, 1) ylog (str): toggle log-scaling on yaxis Returns: None """ """ Probabilty Dist """ # get the probability distribution probas = clf_trained.predict_proba(X_test) # PLOT - count plt.figure(dpi=150) plt.hist(probas, bins=20) plt.title('Classification Probabilities') plt.xlabel('Probability') plt.ylabel('# of Instances') plt.xlim([0.5, 1.0]) if ylog: plt.yscale('log') plt.legend(target_names) plt.show() # PLOT - density plt.figure(dpi=150) plt.hist(probas[:, minority_idx], bins=20, density=True) plt.title('Classification Density (Minority)') plt.xlabel('Probability') plt.ylabel('% of Total') if ylog: plt.yscale('log') plt.xlim([0, 1.0]) plt.legend(target_names) plt.show() """ ROC curve """ # get false and true positive rates fpr, tpr, _ = roc_curve(y_test, probas[:,0], pos_label=0) # get area under the curve clf_auc = auc(fpr, tpr) # PLOT ROC curve plt.figure(dpi=150) plt.plot(fpr, tpr, lw=1, color='green', label=f'AUC = {clf_auc:.3f}') plt.plot([0,1], [0,1], '--k', lw=0.5, label='Random') plt.title('ROC') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate (Recall)') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.legend() plt.show() """ Precision Recall Curve """ # get precision and recall values precision, recall, _ = precision_recall_curve(y_test, probas[:,0], pos_label=0) # average precision score avg_precision = average_precision_score(y_test, probas[:,0]) # precision auc pr_auc = auc(recall, precision) # plot plt.figure(dpi=150) plt.plot(recall, precision, lw=1, color='blue', label=f'AP={avg_precision:.3f}; AUC={pr_auc:.3f}') plt.fill_between(recall, precision, -1, facecolor='lightblue', alpha=0.5) plt.title('PR Curve') plt.xlabel('Recall (TPR)') plt.ylabel('Precision') plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.legend() plt.show()
def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False): """ """ if vali: test_set = 'vali' else: test_set = 'test' data = np.load('./experiments/data/' + identifier + '.data.npy').item() samples = data['samples'] train_X = samples['train'] test_X = samples[test_set] labels = data['labels'] train_Y = labels['train'] test_Y = labels[test_set] if generate: # now sample from the model synth_Y = np.tile(train_Y, [duplicate_synth, 1]) synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y) # for use in TRTS synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y) synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y} np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data) else: print('Loading pre-generated data') print('WARNING: not implemented for TRTS') # get "train" data exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() synth_X = exp_data['samples'] synth_Y = exp_data['labels'] n_synth = synth_X.shape[0] synth_X = synth_X.reshape(n_synth, -1) # pdb.set_trace() # # ALERT ALERT MODIFYING # synth_X = 2*(synth_X > 0) - 1 orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item() if reverse: which_setting = 'trts' # visualise distribution of errors for train and test print('Swapping synthetic test set in for real, to do TRTS!') test_X = synth_testX else: print('Doing normal TSTR') which_setting = 'tstr' # # get test data # test_X = data['test_X'] # test_Y = data['test_Y'] if not CNN: model_choice = 'RF' # if multivariate, reshape if len(test_X.shape) == 3: test_X = test_X.reshape(test_X.shape[0], -1) if len(train_X.shape) == 3: train_X = train_X.reshape(train_X.shape[0], -1) if len(synth_X.shape) == 3: synth_X = synth_X.reshape(synth_X.shape[0], -1) else: raise ValueError(CNN) model_choice = 'CNN' # we will select the best validation set epoch based on F1 score, take average across all the tasks score_list = [] for label in range(synth_Y.shape[1]): task = orig_data['Y_columns'][label] if vali: if not task in ['low_sao2', 'high_heartrate', 'low_respiration']: print('Skipping task', task, 'because validation evaluation.') continue print('Evaluating on task:', task) #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)') #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label]) #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label]) synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label]) synth_predY = synth_classifier.predict(test_X) synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1] real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label]) real_predY = real_classifier.predict(test_X) real_predY_prob = real_classifier.predict_proba(test_X)[:, 1] #print('(predicted', np.mean(predict), 'positive labels)') synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted') synth_accuracy = accuracy_score(test_Y[:, label], synth_predY) synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob) synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob) synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc] real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted') real_accuracy = accuracy_score(test_Y[:, label], real_predY) real_auprc = average_precision_score(test_Y[:, label], real_predY_prob) real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob) real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc] all_scores = synth_scores + real_scores if vali: report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a') report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() else: report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a') report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n') report_file.close() print(classification_report(test_Y[:, label], synth_predY)) print(classification_report(test_Y[:, label], real_predY)) if task in ['low_sao2', 'high_heartrate', 'low_respiration']: score_list.append(synth_auprc + synth_auroc) if do_OR: raise NotImplementedError # do the OR task extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4] extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5] extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6] Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T Y_OR_test = (Y_OR_test > 0)*1 extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4] extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5] extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6] Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T Y_OR_synth = (Y_OR_synth > 0)*1 OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP'] OR_results = [] for label in range(Y_OR_synth.shape[1]): print('task:', OR_names[label]) print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)') m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label]) predict = m.predict(X_test) print('(predicted', np.mean(predict), 'positive labels)') accuracy = accuracy_score(Y_OR_test[:, label], predict) precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict) recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict) print(accuracy, precision, recall) OR_results.append([accuracy, precision, recall]) else: OR_results = [] score_across_tasks = np.mean(np.array(score_list)) return score_across_tasks
def lp_test(self, pred, y): y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy() return roc_auc_score(y, pred), average_precision_score(y, pred)
def average_precision(y_true, y_pred, sample_weight): return average_precision_score(y_true, y_pred)
def aps(X, y, model): probs = model.predict_proba(X)[:, 1] return average_precision_score(y, probs)
def aps2(X, y, model): probs = model.decision_function(X) return average_precision_score(y, probs)
def pr_auc(preds, data): y_true = data.get_label() score = average_precision_score(y_true, preds) return "pr_auc", score, True
def buildModel(dataDict, numFeat, numberOfClasses, f_tp, f_fp, f_th, expName, iteration, model_dir, result_dir): trainData = dataDict['train'] trainLabel = dataDict['trainLabel'] validData = dataDict['valid'] validLabel = dataDict['validLabel'] testData = dataDict['test'] testLabel = dataDict['testLabel'] # building NN model model = Sequential() model.add( Dense(hl[0], activation=paramDict['activation1'], input_shape=(numFeat, ))) model.add(Dropout(paramDict['dropOut'])) for i in range(1, numHidden): if i < len(hl): model.add(Dense(hl[i], activation=paramDict['activation1'])) model.add(Dropout(paramDict['dropOut'])) else: model.add(Dense(1024, activation=paramDict['activation1'])) model.add(Dropout(paramDict['dropOut'])) model.add(Dense(numberOfClasses, activation=paramDict['activation2'])) model.compile(optimizer=optimizerDict['adam'], loss=paramDict['loss'], metrics=paramDict['metrics']) # saving best model by validation accuracy filePath = os.path.join(model_dir, expName + str(iteration) + '_weights.best.hdf5') checkpointer = ModelCheckpoint(filepath=filePath, verbose=0, monitor=paramDict['monitor'], save_best_only=True) earlystopper = EarlyStopping(paramDict['monitor'], patience=15, verbose=1) # fit the model to the training data and verify with validation data model.fit(trainData, trainLabel, epochs=paramDict['epoch'], callbacks=[checkpointer, earlystopper], batch_size=paramDict['batchSize'], shuffle=True, verbose=1, validation_data=(validData, validLabel), class_weight=class_weight) # load best model and compile model.load_weights(filePath) model.compile(optimizer=optimizerDict['adam'], loss=paramDict['loss'], metrics=paramDict['metrics']) # serialize model to JSON (save the model structure in order to use the saved weights) #one time save fn = os.path.join(model_dir, 'model3.json') if not os.path.isfile(fn): model_json = model.to_json() with open(fn, 'w') as json_file: json_file.write(model_json) #save model for later use (including model structure and weights) model_file = os.path.join(model_dir, expName + str(iteration) + '_model.h5') model.save(model_file) # evaluation scores roc_auc = metrics.roc_auc_score(testLabel, model.predict(testData)) #precision here is the auc of precision-recall curve precision = metrics.average_precision_score(testLabel, model.predict(testData)) # get predicted class label probs = model.predict_proba(testData) testPredLabel = model.predict(testData) true_y = list() for y_i in range(len(testLabel)): true_y.append(testLabel[y_i][1]) probs = probs[:, 1] fpr, tpr, threshold = metrics.roc_curve(true_y, probs) for i in range(len(fpr)): f_fp.write(str(fpr[i]) + '\t') f_fp.write('\n') for i in range(len(tpr)): f_tp.write(str(tpr[i]) + '\t') f_tp.write('\n') for i in range(len(threshold)): f_th.write(str(threshold[i]) + '\t') f_th.write('\n') #save precision, recall, and thresholds for PR curve plot p0, r0, t0 = metrics.precision_recall_curve(true_y, probs) fnp0 = os.path.join(result_dir, expName + '_precision.txt') fnr0 = os.path.join(result_dir, expName + '_recall.txt') fnt0 = os.path.join(result_dir, expName + '_PR_threshold.txt') with open(fnp0, 'a') as f0: for i in range(len(p0)): f0.write(str(p0[i]) + '\t') f0.write('\n') with open(fnr0, 'a') as f0: for i in range(len(r0)): f0.write(str(r0[i]) + '\t') f0.write('\n') with open(fnt0, 'a') as f0: for i in range(len(t0)): f0.write(str(t0[i]) + '\t') f0.write('\n') # convert back class label from categorical to integer label testLabelRev = np.argmax(testLabel, axis=1) testPredLabelRev = np.argmax(testPredLabel, axis=1) # get TP, TN, FP, FN to calculate sensitivity, specificity, PPV and accuracy TP, TN, FP, FN = getTPTNValues(testLabelRev, testPredLabelRev) sensitivity = float(TP) / float(TP + FN) specificity = float(TN) / float(TN + FP) PPV = float(TP) / float(TP + FP) accuracy = float(TP + TN) / float(TP + FP + FN + TN) # dictionary to store evaluation stat evaluationInfo = { 'roc_auc': roc_auc, 'precision': precision, 'sensitivity': sensitivity, 'specificity': specificity, 'PPV': PPV, 'accuracy': accuracy, 'batch_size': paramDict['batchSize'], 'activation': paramDict['activation2'], 'dropout': paramDict['dropOut'] } return evaluationInfo
def eval_approx(args, smean, sconc, device, test_loader, ood_loader, teacher_test_samples, teacher_ood_samples): smean.eval() sconc.eval() miscls_origin = [] miscls_approx = [] entros_origin_1 = [] fentros_approx_1 = [] entros_approx_1 = [] entros_origin_2 = [] fentros_approx_2 = [] entros_approx_2 = [] maxp_origin_1 = [] maxp_approx_1 = [] maxp_origin_2 = [] maxp_approx_2 = [] gvalue_approx_1 = [] gvalue_approx_2 = [] batch_idx = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) data = data.view(data.shape[0], -1) g_out = F.softplus(sconc(data)) f_out = F.softmax(smean(data), dim=1) pi_q = f_out.mul(g_out) samples_p_pi = teacher_test_samples[ batch_idx * test_loader.batch_size:(batch_idx + 1) * test_loader.batch_size].to(device) avg_origin_output = torch.mean(samples_p_pi, dim=1) pi_p_avg_batch = avg_origin_output origin_result = torch.argmax(pi_p_avg_batch, dim=1) approx_result = torch.argmax(pi_q, dim=1) miscls_approx.append( (1 - (approx_result == target).float()).cpu().numpy()) miscls_origin.append( (1 - (origin_result == target).float()).cpu().numpy()) entro_origin = (-torch.bmm( pi_p_avg_batch.view(data.shape[0], 1, -1), torch.log(pi_p_avg_batch.view(data.shape[0], -1, 1)))).view(-1) fentro_approx = (-torch.bmm( f_out.view(data.shape[0], 1, -1), torch.log(f_out.view(data.shape[0], -1, 1)))).view(-1) alpha = pi_q alpha0 = alpha.sum(1) entro_approx = torch.lgamma(alpha).sum(1) \ - torch.lgamma(alpha0) \ + (alpha0 - 10).mul(torch.digamma(alpha0)) \ - ((alpha - 1 ).mul(torch.digamma(alpha))).sum(1) entros_origin_1.append(entro_origin.cpu().numpy()) fentros_approx_1.append(fentro_approx.cpu().numpy()) entros_approx_1.append(entro_approx.cpu().numpy()) maxp_origin = 1. / torch.max(pi_p_avg_batch, dim=1)[0] maxp_approx = 1. / torch.max(f_out, dim=1)[0] maxp_origin_1.append(maxp_origin.cpu().numpy()) maxp_approx_1.append(maxp_approx.cpu().numpy()) gvalue_approx_1.append(1. / g_out.cpu().numpy()) batch_idx += 1 miscls_approx = np.concatenate(miscls_approx) miscls_origin = np.concatenate(miscls_origin) entros_origin_1 = np.concatenate(entros_origin_1) fentros_approx_1 = np.concatenate(fentros_approx_1) maxp_origin_1 = np.concatenate(maxp_origin_1) maxp_approx_1 = np.concatenate(maxp_approx_1) gvalue_approx_1 = np.concatenate(gvalue_approx_1) correct_approx = np.sum(1 - miscls_approx) correct_ensemble = np.sum(1 - miscls_origin) print("AUROC (entros_origin_1): ", roc_auc_score(miscls_origin, entros_origin_1)) print("AUROC (hentros_approx_1): ", roc_auc_score(miscls_approx, fentros_approx_1)) print("AUROC (maxp_approx_1): ", roc_auc_score(miscls_approx, maxp_approx_1)) print("AUROC (maxp_origin_1): ", roc_auc_score(miscls_origin, maxp_origin_1)) print("AUROC (gvalue_approx_1): ", roc_auc_score(miscls_approx, gvalue_approx_1)) print("AUPR (entros_origin_1): ", average_precision_score(miscls_origin, entros_origin_1)) print("AUPR (hentros_approx_1): ", average_precision_score(miscls_approx, fentros_approx_1)) print("AUPR (maxp_approx_1): ", average_precision_score(miscls_approx, maxp_approx_1)) print("AUPR (maxp_origin_1): ", average_precision_score(miscls_origin, maxp_origin_1)) print("AUPR (gvalue_approx_1): ", average_precision_score(miscls_approx, gvalue_approx_1)) print('approx ACC :', correct_approx / (len(test_loader.dataset))) print('ensemble ACC :', correct_ensemble / (len(test_loader.dataset))) with torch.no_grad(): for batch_idx, (data, target) in enumerate(ood_loader): data, target = data.to(device), target.to(device) data = data.view(data.shape[0], -1) g_out = F.softplus(sconc(data)) f_out = F.softmax(smean(data), dim=1) pi_q = f_out.mul(g_out) samples_p_pi = teacher_ood_samples[ batch_idx * ood_loader.batch_size:(batch_idx + 1) * ood_loader.batch_size].to(device) avg_origin_output = torch.mean(samples_p_pi, dim=1) pi_p_avg_batch = avg_origin_output entro_origin = (-torch.bmm( pi_p_avg_batch.view(data.shape[0], 1, -1), torch.log(pi_p_avg_batch.view(data.shape[0], -1, 1)))).view(-1) fentro_approx = (-torch.bmm( f_out.view(data.shape[0], 1, -1), torch.log(f_out.view(data.shape[0], -1, 1)))).view(-1) entros_origin_2.append(entro_origin.cpu().numpy()) fentros_approx_2.append(fentro_approx.cpu().numpy()) alpha = pi_q alpha0 = alpha.sum(1) entro_approx = torch.lgamma(alpha).sum(1) \ - torch.lgamma(alpha0) \ + (alpha0 - 10).mul(torch.digamma(alpha0)) \ - ((alpha - 1 ).mul(torch.digamma(alpha))).sum(1) entros_approx_2.append(entro_approx.cpu().numpy()) maxp_origin = 1. / torch.max(pi_p_avg_batch, dim=1)[0] maxp_approx = 1. / torch.max(f_out, dim=1)[0] maxp_origin_2.append(maxp_origin.cpu().numpy()) maxp_approx_2.append(maxp_approx.cpu().numpy()) gvalue_approx_2.append(1. / g_out.cpu().numpy()) batch_idx += 1 entros_origin_2 = np.concatenate(entros_origin_2) fentros_approx_2 = np.concatenate(fentros_approx_2) maxp_origin_2 = np.concatenate(maxp_origin_2) maxp_approx_2 = np.concatenate(maxp_approx_2) gvalue_approx_2 = np.concatenate(gvalue_approx_2) fentros_approx = np.concatenate([fentros_approx_1, fentros_approx_2]) entros_origin = np.concatenate([entros_origin_1, entros_origin_2]) maxp_approx = np.concatenate([maxp_approx_1, maxp_approx_2]) maxp_origin = np.concatenate([maxp_origin_1, maxp_origin_2]) gvalue_approx = np.concatenate([gvalue_approx_1, gvalue_approx_2]) ood = np.concatenate([ np.zeros(test_loader.dataset.__len__()), np.ones(ood_loader.dataset.__len__()) ]) print("-----------------------") print("AUROC (entros_origin): ", roc_auc_score(ood, entros_origin)) print("AUROC (hentros_approx): ", roc_auc_score(ood, fentros_approx)) print("AUROC (maxp_approx): ", roc_auc_score(ood, maxp_approx)) print("AUROC (maxp_origin): ", roc_auc_score(ood, maxp_origin)) print("AUROC (gvalue_approx): ", roc_auc_score(ood, gvalue_approx)) print("AUPR (entros_origin): ", average_precision_score(ood, entros_origin)) print("AUPR (hentros_approx): ", average_precision_score(ood, fentros_approx)) print("AUPR (maxp_approx): ", average_precision_score(ood, maxp_approx)) print("AUPR (maxp_origin): ", average_precision_score(ood, maxp_origin)) print("AUPR (gvalue_approx): ", average_precision_score(ood, gvalue_approx))
def plot_imagewise_classification_roc( results, truths, num_class, classes, ): """ :param results: [ [ pigment, soft_deposit, ] x number-of-images ] :param truths: [ { 'filename': 'a.jpg', 'width': 1280, 'height': 720, "pigment": int, "soft_deposit": int, 'ann': { 'bboxes': <np.ndarray> (n, 4 (xmin, ymin, xmax, ymax)), 'labels': <np.ndarray> (n, ), } } x number-of-images ] :return: """ assert len(results) == len(truths) predict = [[], []] for item in results: predict[0].append(item[0]) predict[1].append(item[1]) gt = [[], []] for item in truths: gt[0].append(item['pigment']) gt[1].append(item['soft_deposit']) all_auc = [] all_AP = [] for i in range(num_class): auc = metrics.roc_auc_score( y_true=gt[i], y_score=predict[i], ) fpr, tpr, thresholds = metrics.roc_curve( y_true=gt[i], y_score=predict[i], ) with open( dir_path + '/visualization/imagewise_classification_roc_{}.csv'.format(i), 'a') as csvFile: writer = csv.writer(csvFile) writer.writerow(thresholds) writer.writerow(fpr) writer.writerow(tpr) precision, recall, thresholds = metrics.precision_recall_curve( y_true=gt[i], probas_pred=predict[i], ) AP = metrics.average_precision_score( y_true=gt[i], y_score=predict[i], ) plt.figure() plt.title('ROC') plt.xlabel('False Positive rate') plt.ylabel('True Positive rate') plt.ylim(0, 1) plt.plot(fpr, tpr, label='AUC: ' + str(auc)) plt.legend() plt.savefig( dir_path + '/visualization/imagewise_classification_roc_{}.png'.format(i)) plt.figure() plt.title('Precision-Recall') plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1]) plt.plot(recall, precision, label='mAP: ' + str(AP)) plt.savefig( dir_path + '/visualization/imagewise_classification_prc_{}.png'.format(i)) print('auc for class {} is: {}'.format(i, auc)) print('AP for class {} is: {}'.format(i, AP)) all_auc.append(auc) all_AP.append(AP) # class-average print('class-average auc is: {}'.format(np.mean(all_auc))) print('class-average AP is: {}'.format(np.mean(all_AP))) # pure-average flat_predict_list = [] for sublist in predict: for item in sublist: flat_predict_list.append(item) flat_gt_list = [] for sublist in gt: for item in sublist: flat_gt_list.append(item) auc = metrics.roc_auc_score( y_true=flat_gt_list, y_score=flat_predict_list, ) fpr, tpr, thresholds = metrics.roc_curve( y_true=flat_gt_list, y_score=flat_predict_list, ) precision, recall, thresholds = metrics.precision_recall_curve( y_true=flat_gt_list, probas_pred=flat_predict_list, ) AP = metrics.average_precision_score( y_true=flat_gt_list, y_score=flat_predict_list, ) plt.figure() plt.title('ROC') plt.xlabel('False Positive rate') plt.ylabel('True Positive rate') plt.ylim(0, 1) plt.plot(fpr, tpr, label='AUC: ' + str(auc)) plt.legend() plt.savefig(dir_path + '/visualization/imagewise_classification_roc_all.png') plt.figure() plt.title('Precision-Recall') plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1]) plt.plot(recall, precision, label='mAP: ' + str(AP)) plt.savefig(dir_path + '/visualization/imagewise_classification_prc_all.png') print('pure average auc is: {}'.format(auc)) print('pure average AP is: {}'.format(AP))
horizontal_flip = True, vertical_flip = True, rotation_range = 30) le = LabelEncoder().fit(labels) trainY = np_utils.to_categorical(le.transform(trainY), 2) # le = LabelEncoder().fit(testY) testY = np_utils.to_categorical(le.transform(testY), 2) print("[INFO] compiling model...") model=ZFNET() model.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy']) H = model.fit(train_datagen.flow(trainX, trainY, batch_size = 32), validation_data=(testX, testY), steps_per_epoch=len(trainX) // 32,epochs=5, verbose=1) # evaluate the network print("[INFO] evaluating network...") predictions = model.predict(testX, batch_size=20) precision=precision_score(testY.argmax(axis=1), predictions.argmax(axis=1), average='weighted') average_precision = average_precision_score(testY, predictions) recall = recall_score(testY.argmax(axis=1), predictions.argmax(axis=1),average='weighted') AUCC.append(average_precision) REC.append(recall) PR.append(precision) print('AUPR',average_precision,'presi',precision,'recall',recall) # print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=le.classes_)) model.save('ZFNET.HDF5') # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(2): fpr[i], tpr[i], _ = metrics.roc_curve(testY[:, i], predictions[:, i]) roc_auc[i] = metrics.auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area
random_state=random_state) classifier = OneVsRestClassifier( svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute Precision-Recall and plot curve # 下面的下划线是返回的阈值。作为一个名称:此时“_”作为临时性的名称使用。 # 表示分配了一个特定的名称,但是并不会在后面再次用到该名称。 precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve( y_test[:, i], y_score[:, i] ) # The last precision and recall values are 1. and 0. respectively and do not have a corresponding threshold. This ensures that the graph starts on the x axis. average_precision[i] = average_precision_score( y_test[:, i], y_score[:, i]) # 切片,第i个类的分类结果性能 # Compute micro-average curve and area. ravel()将多维数组降为一维 precision["micro"], recall["micro"], _ = precision_recall_curve( y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score( y_test, y_score, average="micro" ) # This score corresponds to the area under the precision-recall curve. # Plot Precision-Recall curve for each class plt.clf() # clf 函数用于清除当前图像窗口 plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})'.format( average_precision["micro"])) for i in range(n_classes):
def get_ROC_PR_data(data, clf, pos_label_=None, verbose=False): """ source, df, clf = get_ROC_PR_data(data, clf, verbose=False) get_ROC_data will return ColumnDataSource and dataframes with TPR and FPR for a particular dataset and an untrained classifier. The CSD can be used to plot a Bokeh plot while the dataframe can be used for additional exploration and plotting with other libs. Note that the dataframes are returned with metadata (e.g. AUC and the clf used). data: tuple of our data (X_train, X_test, y_train, y_test) where each item in the tuple is a numpy ndarray clf: an untrained classifier (e.g. rf = RandomForestClassifier()) pos_label_: if targets are not binary (0, 1) then indicate integer for "positive" [default: None] verbose: print warnings [default: False] """ # split data into training, testing (X_train, X_test, y_train, y_test) = data # train and retrieve probabilities of class per feature for the test data probas = clf.fit(X_train, y_train).predict_proba(X_test) # get false and true positive rates for positive labels # (and thresholds, which is not used but shown here for fyi) if not pos_label_: pos_label_ = np.max(y_train) if verbose: print(f"Warning: Maximum target value of '{pos_label_}' used as positive.") print("You can use 'pos_label_' to indicate your own.") """ ROC """ fpr, tpr, roc_thresholds = roc_curve(y_test, probas[:,1], pos_label=pos_label_) roc_thresholds[0] = np.nan # get area under the curve (AUC) roc_auc = auc(fpr, tpr) """ PR """ # get precision and recall values precision, recall, pr_thresholds = precision_recall_curve(y_test, probas[:,1], pos_label=pos_label_) pr_thresholds = np.insert(pr_thresholds, 0, 0) # do this to correct lengths # average precision score avg_precision = average_precision_score(y_test, probas[:,1]) # precision auc pr_auc = auc(recall, precision) """ Create Sources """ # create legend variables - we'll create an array with len(tpr) roc_auc_ = [f"AUC: {roc_auc:.3f}"]*len(tpr) pr_auc_ = [f"AUC: {pr_auc:.3f}"]*len(precision) clf_name = get_clf_name(clf) clf_roc = [f"{clf_name}, AUC: {roc_auc:.3f}"]*len(tpr) clf_pr = [f"{clf_name}, AUC: {pr_auc:.3f}"]*len(precision) # create bokeh column source for plotting new ROC source_ROC = ColumnDataSource(data=dict(x_fpr=fpr, y_tpr=tpr, thresh_roc=roc_thresholds, auc_legend=roc_auc_, clf_legend=clf_roc)) source_PR = ColumnDataSource(data=dict(x_rec=recall, y_prec=precision, thresh_pr=pr_thresholds, auc_legend=pr_auc_, clf_legend=clf_pr)) """ Dataframes """ # create output dataframe with TPR and FPR, and metadata df_ROC = pd.DataFrame({'TPR':tpr, 'FPR':fpr, 'Thresholds':roc_thresholds}) df_ROC.auc = roc_auc df_ROC.clf = get_clf_name(clf) df_ROC.score = clf.score(X_test, y_test) # create output dataframe with TPR and FPR, and metadata df_PR = pd.DataFrame({'Recall':recall, 'Precision':precision, 'Thresholds':pr_thresholds}) df_PR.auc = pr_auc df_PR.clf = get_clf_name(clf) df_PR.score = clf.score(X_test, y_test) return source_ROC, source_PR, df_ROC, df_PR, clf
'class': key, 'prob': new_probs[jk] } all_dets.append(det) print('Elapsed time = {}'.format(time.time() - st)) t, p = get_map(all_dets, img_data['bboxes'], (fx, fy)) for key in t.keys(): if key not in T: T[key] = [] P[key] = [] T[key].extend(t[key]) # extend list P[key].extend(p[key]) # extend list all_aps = [] for key in T.keys(): ap = average_precision_score(T[key], P[key]) print(len(T[key]), len(P[key])) print('{} AP: {}'.format(key, ap)) all_aps.append(ap) if idx == 20: print(T) print(P) f_map = np.mean(np.array(all_aps)) print('mAP = {}'.format(f_map)) #print(T) #print(P) print('final map = %f' % f_map) K.clear_session() '''
def plot_precision_recall_curve(clf, X, y, title='Precision-Recall Curve', do_split=True, test_split_ratio=0.33, random_state=None, ax=None): """Generates the Precision-Recall curve for a given classifier and dataset. Args: clf: Classifier instance that implements "fit" and "predict_proba" methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "Precision-Recall Curve". do_split (bool, optional): If True, the dataset is split into training and testing sets. The classifier is trained on the training set and the Precision-Recall curves are plotted using the performance of the classifier on the testing set. If False, the Precision-Recall curves are generated without splitting the dataset or training the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. test_split_ratio (float, optional): Used when do_split is set to True. Determines the proportion of the entire dataset to use in the testing split. Default is set to 0.33. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> nb = classifier_factory(GaussianNB()) >>> nb.plot_precision_recall_curve(X, y, random_state=1) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_precision_recall_curve.png :align: center :alt: Precision Recall Curve """ if not hasattr(clf, 'predict_proba'): raise TypeError('"predict_proba" method not in classifier. ' 'Cannot calculate Precision-Recall Curve.') if not do_split: classes = clf.classes_ probas = clf.predict_proba(X) y_true = y else: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_split_ratio, stratify=y, random_state=random_state) clf_clone = clone(clf) probas = clf_clone.fit(X_train, y_train).predict_proba(X_test) classes = clf_clone.classes_ y_true = y_test # Compute Precision-Recall curve and area for each class precision = dict() recall = dict() average_precision = dict() for i in range(len(classes)): precision[i], recall[i], _ = precision_recall_curve( y_true, probas[:, i], pos_label=classes[i]) y_true = label_binarize(y_true, classes=classes) if len(classes) == 2: y_true = np.hstack((1 - y_true, y_true)) for i in range(len(classes)): average_precision[i] = average_precision_score(y_true[:, i], probas[:, i]) # Compute micro-average ROC curve and ROC area micro_key = 'micro' i = 0 while micro_key in precision: i += 1 micro_key += str(i) precision[micro_key], recall[micro_key], _ = precision_recall_curve( y_true.ravel(), probas.ravel()) average_precision[micro_key] = average_precision_score(y_true, probas, average='micro') if ax is None: fig, ax = plt.subplots(1, 1) ax.set_title(title) for i in range(len(classes)): ax.plot(recall[i], precision[i], lw=2, label='Precision-recall curve of class {0} ' '(area = {1:0.3f})'.format(classes[i], average_precision[i])) ax.plot(recall[micro_key], precision[micro_key], lw=2, color='gold', label='micro-average Precision-recall curve ' '(area = {0:0.3f})'.format(average_precision[micro_key])) ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.legend(loc='best') return ax
# In[34]: recall = dict() av_precision = dict() # In[35]: test_y_lb = label_binarize(test_y, classes=[1, 2, 3, 4, 5, 6, 7]) best_pred = label_binarize(best_pred, classes=[1, 2, 3, 4, 5, 6, 7]) # In[36]: for i in range(7): precision[i], recall[i], _ = precision_recall_curve( test_y_lb[:, i], best_pred[:, i]) av_precision[i] = average_precision_score(test_y_lb[:, i], best_pred[:, i]) # In[74]: precision # In[72]: recall # In[77]: for i in range(7): plt.plot(recall[i], precision[i]) plt.xlabel('Recall') plt.ylabel('Precision')
def test_rcnn(): """Test RCNN model.""" # Load data logger.info("✔︎ Loading data...") logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Test data processing...") test_data = dh.load_data_and_labels(FLAGS.test_data_file, FLAGS.num_classes, FLAGS.embedding_dim, data_aug_flag=False) logger.info("✔︎ Test data padding...") x_test, y_test = dh.pad_data(test_data, FLAGS.pad_seq_len) y_test_labels = test_data.labels # Load rcnn model BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ") while not (BEST_OR_LATEST.isalpha() and BEST_OR_LATEST.upper() in ['B', 'L']): BEST_OR_LATEST = input( "✘ The format of your input is illegal, please re-input: ") if BEST_OR_LATEST.upper() == 'B': logger.info("✔︎ Loading best model...") checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir, select_maximum_value=True) else: logger.info("✔︎ Loading latest model...") checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) logger.info(checkpoint_file) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] is_training = graph.get_operation_by_name("is_training").outputs[0] # Tensors we want to evaluate scores = graph.get_operation_by_name("output/scores").outputs[0] loss = graph.get_operation_by_name("loss/loss").outputs[0] # Split the output nodes name by '|' if you have several output nodes output_node_names = "output/scores" # Save the .pb model file output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_node_names.split("|")) tf.train.write_graph(output_graph_def, "graph", "graph-rcnn-{0}.pb".format(MODEL), as_text=False) # Generate batches for one epoch batches = dh.batch_iter(list(zip(x_test, y_test, y_test_labels)), FLAGS.batch_size, 1, shuffle=False) test_counter, test_loss = 0, 0.0 test_pre_tk = [0.0] * FLAGS.top_num test_rec_tk = [0.0] * FLAGS.top_num test_F_tk = [0.0] * FLAGS.top_num # Collect the predictions here true_labels = [] predicted_labels = [] predicted_scores = [] # Collect for calculating metrics true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)] for batch_test in batches: x_batch_test, y_batch_test, y_batch_test_labels = zip( *batch_test) feed_dict = { input_x: x_batch_test, input_y: y_batch_test, dropout_keep_prob: 1.0, is_training: False } batch_scores, cur_loss = sess.run([scores, loss], feed_dict) # Prepare for calculating metrics for i in y_batch_test: true_onehot_labels.append(i) for j in batch_scores: predicted_onehot_scores.append(j) # Get the predicted labels by threshold batch_predicted_labels_ts, batch_predicted_scores_ts = \ dh.get_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) # Add results to collection for i in y_batch_test_labels: true_labels.append(i) for j in batch_predicted_labels_ts: predicted_labels.append(j) for k in batch_predicted_scores_ts: predicted_scores.append(k) # Get onehot predictions by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=batch_scores, threshold=FLAGS.threshold) for i in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(i) # Get onehot predictions by topK for top_num in range(FLAGS.top_num): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk( scores=batch_scores, top_num=top_num + 1) for i in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[top_num].append(i) test_loss = test_loss + cur_loss test_counter = test_counter + 1 # Calculate Precision & Recall & F1 (threshold & topK) test_pre_ts = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_rec_ts = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') test_F_ts = f1_score(y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') for top_num in range(FLAGS.top_num): test_pre_tk[top_num] = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_rec_tk[top_num] = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') test_F_tk[top_num] = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') # Calculate the average AUC test_auc = roc_auc_score(y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR test_prc = average_precision_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average="micro") test_loss = float(test_loss / test_counter) logger.info( "☛ All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}". format(test_loss, test_auc, test_prc)) # Predict by threshold logger.info( "☛ Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(test_pre_ts, test_rec_ts, test_F_ts)) # Predict by topK logger.info("☛ Predict by topK:") for top_num in range(FLAGS.top_num): logger.info( "Top{0}: Precision {1:g}, Recall {2:g}, F {3:g}".format( top_num + 1, test_pre_tk[top_num], test_rec_tk[top_num], test_F_tk[top_num])) # Save the prediction result if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data.testid, all_labels=true_labels, all_predict_labels=predicted_labels, all_predict_scores=predicted_scores) logger.info("✔︎ Done.")
def _compute_score(model, X, y, scoring_metric=None, scoring_params=None): '''Helper function that maps metric string names to their function calls. Parameters ---------- model : class inheriting sklearn.base.BaseEstimator The classifier whose hyperparams you need to optimize with grid search. The model must have model.fit(X,y) and model.predict(X) defined. Although it can work without it, its best if you also define model.score(X,y) so you can decide the scoring function for deciding the best parameters. If you are using an sklearn model, everything will work out of the box. To use a model from a different library is no problem, but you need to wrap it in a class and inherit sklearn.base.BaseEstimator as seen in: https://github.com/cgnorthcutt/hyperopt X : np.array of shape (n, m) The training data. y : np.array of shape (n,) or (n, 1) Corresponding labels. scoring_metric : str See hypopt.GridSearch.fit() scoring parameter docstring for list of options. scoring_params : dict All other params you want passed to the scoring function. Params will be passed as scoring_func(**scoring_params).''' if scoring_params is None: scoring_params = {} if scoring_metric == 'accuracy': return metrics.accuracy_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'brier_score_loss': return metrics.brier_score_loss(y, model.predict(X), **scoring_params) elif scoring_metric == 'average_precision': return metrics.average_precision_score(y, model.predict_proba(X)[:, 1], **scoring_params) elif scoring_metric == 'f1': return metrics.f1_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'f1_micro': return metrics.f1_score(y, model.predict(X), average='micro', **scoring_params) elif scoring_metric == 'f1_macro': return metrics.f1_score(y, model.predict(X), average='macro', **scoring_params) elif scoring_metric == 'f1_weighted': return metrics.f1_score(y, model.predict(X), average='weighted', **scoring_params) elif scoring_metric == 'neg_log_loss': return -1. * metrics.log_loss(y, model.predict_proba(X), ** scoring_params) elif scoring_metric == 'precision': return metrics.precision_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'recall': return metrics.recall_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'roc_auc': return metrics.roc_auc_score(y, model.predict_proba(X)[:, 1], **scoring_params) elif scoring_metric == 'explained_variance': return metrics.explained_variance_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'neg_mean_absolute_error': return -1. * metrics.mean_absolute_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_mean_squared_error': return -1. * metrics.mean_squared_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_mean_squared_log_error': return -1. * metrics.mean_squared_log_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_median_absolute_error': return -1. * metrics.median_absolute_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'r2': return metrics.r2_score(y, model.predict(X), **scoring_params) else: raise ValueError(scoring_metric + 'is not a supported metric.')
random_state=0) cdsw.track_metric("numTrees", param_numTrees) cdsw.track_metric("maxDepth", param_maxDepth) cdsw.track_metric("impurity", param_impurity) randF.fit(pdTrain[features], pdTrain['label']) predictions = randF.predict(pdTest[features]) #temp = randF.predict_proba(pdTest[features]) pd.crosstab(pdTest['label'], predictions, rownames=['Actual'], colnames=['Prediction']) list(zip(pdTrain[features], randF.feature_importances_)) y_true = pdTest['label'] y_scores = predictions auroc = roc_auc_score(y_true, y_scores) ap = average_precision_score(y_true, y_scores) print(auroc, ap) cdsw.track_metric("auroc", auroc) cdsw.track_metric("ap", ap) pickle.dump(randF, open("models/sklearn_rf.pkl", "wb")) cdsw.track_file("models/sklearn_rf.pkl")
balanced_accuracy_np = np.zeros(len(random_seed_list)) log_loss_np = np.zeros(len(random_seed_list)) avg_precision_np = np.zeros(len(random_seed_list)) for k, seed in enumerate(random_seed_list): random.seed(int(seed)) rnd_inds = random.sample(range(x_test.shape[0]), int(0.8*x_test.shape[0])) curr_y_test = y_test[rnd_inds] curr_x_test = x_test[rnd_inds, :] y_pred = np.argmax(rnn.predict_proba(curr_x_test), -1) y_pred_proba = rnn.predict_proba(curr_x_test)[:, 1] y_score = y_pred_proba roc_auc_np[k] = roc_auc_score(curr_y_test, y_score) balanced_accuracy_np[k] = balanced_accuracy_score(curr_y_test, y_pred) log_loss_np[k] = log_loss(curr_y_test, y_pred_proba, normalize=True) / np.log(2) avg_precision_np[k] = average_precision_score(curr_y_test, y_score) print('tslice : %s, ROC-AUC : %.2f'%(tslice, np.percentile(roc_auc_np, 50))) for prctile in prctile_vals: row_dict = dict() row_dict['model'] = 'RNN' row_dict['percentile'] = prctile row_dict['tslice'] = tslice row_dict['roc_auc'] = np.percentile(roc_auc_np, prctile) row_dict['balanced_accuracy'] = np.percentile(balanced_accuracy_np, prctile) row_dict['log_loss'] = np.percentile(log_loss_np, prctile) row_dict['average_precision'] = np.percentile(avg_precision_np, prctile) perf_df = perf_df.append(row_dict, ignore_index=True)
def train(args): np.random.RandomState(46) # Load the data into MedGraph data structure graph_file = 'data/%s.npz' % args.dataset data_loader = DataLoader(graph_file) display_freq = 10 # Frequency of displaying the training results # Set user-defined settings in the data loader data_loader.embedding_dim = args.embedding_dim data_loader.vc_batch_size = args.vc_batch_size data_loader.K = args.K data_loader.learning_rate = args.learning_rate data_loader.is_gauss = args.gauss data_loader.distance = args.distance data_loader.is_time_dis = args.time_dis model = MedGraph(data_loader) # Number of training iterations in each epoch global_step = 0 num_iter = len(data_loader.vv_train) // args.vv_batch_size print('Number of iterations per epoch: {}'.format(num_iter)) with tf.Session() as sess: tf.global_variables_initializer().run() for epoch in range(args.num_epochs): start_time = time.time() tot_loss = 0 data = data_loader.sequential_randomize_vv_sequences(data_loader.vv_train_seq) for iteration in range(num_iter): global_step += 1 start = iteration * args.vv_batch_size end = (iteration + 1) * args.vv_batch_size if iteration < num_iter else data[0].shape[0] # Fetch vv sequences for the current batch (batch_vv_inputs, batch_time_train_in, batch_time_train_out, batch_out_mask, batch_vv_outputs) = data_loader.fetch_vv_batch(data, start, end) # Fetch vc edges for the current batch (vc_u_i, vc_u_j, vc_label) = data_loader.fetch_vc_batch(batch_size=args.vc_batch_size, K=args.K) # Run optimization operation (backprop) feed_dict_batch = { model.X_visits: sparse_feeder(data_loader.X_visits_train), model.vv_inputs: batch_vv_inputs, model.vv_outputs: batch_vv_outputs, model.vv_in_time: batch_time_train_in, model.vv_out_time: batch_time_train_out, model.vv_out_mask: batch_out_mask, model.vc_u_i: vc_u_i, model.vc_u_j: vc_u_j, model.vc_label: vc_label} loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict_batch) tot_loss += loss print("Epoch {:3d}:\t Training loss: {:.4f}\t Time taken: {:.4f}sec".format(epoch + 1, tot_loss / num_iter, time.time() - start_time)) # Run validation and test after every epoch # Predict for validation set feed_dict_valid = {model.X_visits: sparse_feeder(data_loader.X_visits_val), model.vv_inputs: data_loader.vv_valid_seq[0], model.vv_in_time: data_loader.vv_valid_seq[1], model.vv_out_time: data_loader.vv_valid_seq[2], model.vv_out_mask: data_loader.vv_valid_seq[3]} y_pred_valid = sess.run(model.y, feed_dict=feed_dict_valid) # Calculate validation set evaluation metrics val_auc = roc_auc_score(y_true=data_loader.vv_valid_seq[4], y_score=y_pred_valid) val_ap = average_precision_score(y_true=data_loader.vv_valid_seq[4], y_score=y_pred_valid) # Predict for test set feed_dict_test = {model.X_visits: sparse_feeder(data_loader.X_visits_test), model.vv_inputs: data_loader.vv_test_seq[0], model.vv_in_time: data_loader.vv_test_seq[1], model.vv_out_time: data_loader.vv_test_seq[2], model.vv_out_mask: data_loader.vv_test_seq[3]} y_pred_test = sess.run(model.y, feed_dict=feed_dict_test) # Calculate test set evaluation metrics test_auc = roc_auc_score(y_true=data_loader.vv_test_seq[4], y_score=y_pred_test) test_ap = average_precision_score(y_true=data_loader.vv_test_seq[4], y_score=y_pred_test) # Save visit and code embeddings for test data (we use the same mapping dictionary) if args.gauss: mu, sigma = sess.run([model.embedding, model.sigma], feed_dict=feed_dict_test) np.save('emb/%s_embedding.npy' % args.dataset, {'mu': data_loader.embedding_mapping(mu), 'sigma': data_loader.embedding_mapping(sigma)}) else: mu = sess.run(model.embedding, feed_dict=feed_dict_test) np.save('emb/%s_embedding.npy' % args.dataset, data_loader.embedding_mapping(mu)) print( "Validation AUC: {:.4f}\t Validation AP: {:.4f}\t Test AUC: {:.4f}\t Test AP: {:.4f}\t".format( val_auc, val_ap, test_auc, test_ap)) print('----------------------------------------------------------------------------------------------------------------------------------')
y_score = classifier.decision_function(X_test) # %% # The average precision score in multi-label settings # ................................................... from sklearn.metrics import precision_recall_curve from sklearn.metrics import average_precision_score # For each class precision = dict() recall = dict() average_precision = dict() for i in range(n_classes): precision[i], recall[i], _ = precision_recall_curve( Y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i]) # A "micro-average": quantifying score on all classes jointly precision["micro"], recall["micro"], _ = precision_recall_curve( Y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(Y_test, y_score, average="micro") # %% # Plot the micro-averaged Precision-Recall curve # .............................................. display = PrecisionRecallDisplay( recall=recall["micro"], precision=precision["micro"], average_precision=average_precision["micro"],