def single_experiment_false_fraction(data, gamma, nu):
    C = 1./ len(data) / nu
    model = SVDD(kernel='rbf', C=C, gamma=gamma)
    normal_data, anomaly_data = split_anomaly_normal_data(data)
    anomaly_elements_count = int(len(normal_data) * nu / (1. - nu))
    rows = sample(anomaly_data.index, anomaly_elements_count)
    anomaly_data = anomaly_data.ix[rows]
    normal_train, normal_validate, normal_test = split_data_set(normal_data, 3)
    anomaly_train, anomaly_validate, anomaly_test = split_data_set(anomaly_data, 3)
    anomaly_train = concatenate([anomaly_train, anomaly_validate])
    normal_train = concatenate([normal_train, normal_validate])
    model.fit(np.concatenate([anomaly_train, normal_train]))
    anomaly_prediction = model.decision_function(anomaly_test)
    normal_prediction = model.decision_function(normal_test)
    false_anomaly = mean(normal_prediction < 0)
    false_normal = mean(anomaly_prediction > 0)
    prediction = concatenate([anomaly_prediction, normal_prediction])
    true_labels = array([1] * len(anomaly_prediction) + [-1] * len(normal_prediction))
    auc_score = average_precision_score(true_labels, -1 * prediction)
    train_data = concatenate([anomaly_train, normal_train])
    slice_score = slice_probability_metric(model, train_data)
    support_score = support_vectors_metric(model, train_data, nu)
    smote_score = validate_classifier_by_random_points(model, train_data, (1. - nu)/nu)
    vc_score = combinatorial_dimension_metric(model, train_data)
    kernel_score = kernel_metric(model, train_data)
    return false_anomaly, false_normal, auc_score, \
           slice_score, smote_score, vc_score, support_score, kernel_score
예제 #2
0
def main(argv):
    label_file = argv[0]
    score_file = argv[1]
    output_file = argv[2]
    # load csv file
    labels = []
    # with open('/storage/phuongdv/vc-data/te.csv') as csvfile:
    #     reader = csv.DictReader(csvfile, delimiter=' ')
    #     for row in reader:
    #         labels.append(int(row["Label"]))

    with open(label_file) as csvfile:
        for line in csvfile:
            row = line.split(' ')
            labels.append(int(row[0]))

    lines = [line.rstrip('\n') for line in open(score_file)]
    scores = []
    for line in lines:
        scores.append(float(line))

    # Compute fpr, tpr, thresholds and roc auc
    fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
    roc_auc = auc(fpr, tpr)

    # pre, rec, _ = precision_recall_curve(labels, scores, pos_label=1)
    average_precision_macro = average_precision_score(labels, scores)
    average_precision_micro = average_precision_score(labels, scores,
                                                      average="micro")

    log_loss_ffm = log_loss(labels, scores)

    print("log loss : {}".format(log_loss_ffm))
    print("AUC : {}".format(roc_auc))
    print("PR|AUC_micro: {}, and PR_AUC_macro:{}".format(average_precision_micro, average_precision_macro))
예제 #3
0
파일: measures.py 프로젝트: seitrec/RI
def compute_results(query_results, query_relevant):
    y_scores = []  # list of scores from the search engine for each (query, document) couple
    y_true = []  # 1 if the document is relevant for the query 0 otherwise
    nb_queries = 0
    for result in query_results:
        if result[0] in query_relevant.keys():
            nb_queries += 1
            y_scores_temp = []
            y_true_temp = []
            pred_list = result[2]
            true_list = query_relevant[result[0]]
            for doc in pred_list:
                y_scores.append(doc[1])
                y_scores_temp.append(doc[1])
                if doc[0] in true_list:
                    y_true.append(1)
                    y_true_temp.append(1)
                else:
                    y_true.append(0)
                    y_true_temp.append(0)
            # We print the AP score for each single query
            print(result[0], average_precision_score(y_true_temp,y_scores_temp))

    precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
    AP = average_precision_score(y_true, y_scores)
    return nb_queries, precision, recall, AP
def evaluate_multiple(ground_truths, prediction_scores, compute_micro_macro_avg=False):
    """

    :param ground_truths: 1-d array annotated with class labels start from 0, e.g. gt: [0, 0, 1, 3, 2, 1, 0]
    :param prediction_scores: 2-d array recorded the corresponding probability scores for each class
    :param compute_micro_macro_avg: switch if the micro and macro average roc are needed
    :return: Dictory with number of class: false_positive_rates, true_positive_rates, thresholds, roc_aucs

    """

    # Check dimension
    if len(prediction_scores.shape) != 2:
        print 'The dimension of \'prediction_scores\' should be 2.'
        return

    N = prediction_scores.shape[0]
    M = prediction_scores.shape[1]

    precisions = {}
    recalls = {}
    thresholds = {}
    avg_precisions = {}

    if compute_micro_macro_avg:
        gt_label_array = []
        prediction_score_array = []

    for class_label in range(0, M):

        # Generate Class Label
        ground_truth_label = np.zeros(N, dtype=int)
        idx = (ground_truths == class_label)
        ground_truth_label[idx] = 1

        # Extract positive scores
        prediction_score = prediction_scores[:, class_label]

        # Compute ROC curve
        precision, recall, threshold = precision_recall_curve(ground_truth_label, prediction_score)
        avg_precision = average_precision_score(ground_truth_label, prediction_score)

        precisions[class_label] = precision
        recalls[class_label] = recall
        thresholds[class_label] = threshold
        avg_precisions[class_label] = avg_precision

        if compute_micro_macro_avg:
            gt_label_array.append(ground_truth_label)
            prediction_score_array.append(prediction_score)

    if compute_micro_macro_avg:
        gt_label_array = np.asarray(gt_label_array)
        prediction_score_array = np.asarray(prediction_score_array)

        # Compute Micro Avg.
        precisions["micro"], recalls["micro"], _ = precision_recall_curve(gt_label_array.ravel(),
                                                                                   prediction_score_array.ravel())
        avg_precisions["micro"] = average_precision_score(gt_label_array, prediction_score_array, average="micro")

    return precisions, recalls, thresholds, avg_precisions
예제 #5
0
 def calc_precision_recall_fmeasure(self):
     """ Computes Precision, Recall, F-measure and Support """
     
     #  precision, recall, F-measure and support for each class for a given thresholds
     for threshold in [10, 30, 50]:
         result = precision_recall_fscore_support(self.y_true, prediction_to_binary(self.y_pred, threshold))
         self.scores['Precision ' + str(threshold) + '%'] = result[0]
         self.scores['Recall ' + str(threshold) + '%'] = result[1]
         self.scores['F-score ' + str(threshold) + '%'] = result[2]
         self.scores['Support'] = result[3]
        
     # Computes precision-recall pairs for different probability thresholds
     self.precision, self.recall, self.thresholds = precision_recall_curve(self.y_true, self.y_pred)    
     #print "precision = " + str(precision)
     #print "recall = " + str(recall)
     #print "thresholds = " +  str(thresholds)
     
     # Compute the area under the precision-recall curve (average precision from prediction scores)
     self.scores['Precision-Recall AUC'] = average_precision_score(self.y_true, self.y_pred)    
     
     
     self.scores['Weighted Precision'] = average_precision_score(self.y_true, self.y_pred, average='weighted') # weighted average precision by support (the number of true instances for each label).
     self.scores['Average Recall'] = np.average(self.recall)
     self.scores['Average Threshold'] = np.average(self.thresholds)
     
     return
예제 #6
0
def plotPrc(clfName, folds, outdir):
    y_tests = []
    y_scores = []
    plt.clf()
    for i, (clf, X_test, y_test, _, _, _, _,_,_,_) in enumerate(folds):
        try:
            y_score = clf.decision_function(X_test)
        except AttributeError:
            y_score = clf.predict_proba(X_test)[:, 0]
        precision, recall, _ = precision_recall_curve(y_test, y_score, pos_label=POSTIVE_LABEL)
        y_tests.extend(y_test)
        y_scores.extend(y_score)

        try:
            area = average_precision_score(y_test, y_score)
        except ValueError:
            area = 0.0
        clf.prc_auc = area
        plt.plot(recall, precision, label='Fold %d, AUC = %0.2f' % (i, area), lw=1)

    precision, recall, _ = precision_recall_curve(y_tests, y_scores, pos_label=POSTIVE_LABEL)
    try:
        area = average_precision_score(y_tests, y_scores)
    except ValueError:
        area = 0.0
    plt.plot(recall, precision, 'k--', label='Mean, AUC = %0.2f' % (area), lw=2)

    plt.title('Precision-Recall: %s\n%s'%(clfName,outdir.name.replace("_"," ")))
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.legend(loc="lower center", prop=legendprop)

    plt.savefig(str(outdir/(clfName.replace(" ","_")+'_precision-recall.png')))
예제 #7
0
def run(tweets, classifications, classifier=None):
    if len(tweets)!=len(classifications):
        raise ValueError('Error: Tweet population size and classifications size not matching.')
    
    population = utils.prepare_entr_tweets(tweets, classifications, 2)
    
    vectorizer = FeatureUnion([('tfidf', TfidfVectorizer(
                                            stop_words='english',
                                            tokenizer=EnglishTokenizer(),
                                            ngram_range=(1, 3),
                                            use_idf=False)),
                               ('sent', Vectorizer(
                                           stop_words='english',
                                           tokenizer=EnglishTokenizer(),
                                           ngram_range=(1, 3),))])

    if classifier is None:
        clf = MultinomialNB()
    else:
        clf = classifier

    pipeline = Pipeline([('vect', vectorizer),
                         ('clf', clf)])

    pipeline.fit(population['train_tweets'], y=population['train_classif'])
    
    predicted = pipeline.predict(population['val_tweets'])
    
    metrics = precision_recall_fscore_support(population['val_classif'], predicted, average='macro', pos_label=None)
    
    print("Exactitud:{0}\nPrecision:{1}\nRecall:{2}\nF1:{3}".format(
        accuracy_score(population['val_classif'], predicted), metrics[0], metrics[1], metrics[2]))
    
    score = pipeline.predict_proba(population['val_tweets'])[:, 0]

    print("AUC:{0}".format(average_precision_score(population['val_classif_bin'], score, average="micro")))
    
    precision = dict()
    recall = dict()
    average_precision = dict()
    
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(
                                                population['val_classif_bin'], score)
    average_precision["micro"] = average_precision_score(population['val_classif_bin'], score,
                                                         average="micro")
    
    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall curve')
    plt.legend(loc="lower right")
    plt.show()
def Evaluate_Ranking(Direct_llista,train_or_val): #Funció declarada passant com a paràmetres 

    fitxer_anot = open(ruta_abs+"\TerrassaBuildings900\train\Annotation_"+train_or_val+".txt" , "r") #Obrim els arxius quan el valor de train_or_val sigui = a 'train'.     
    fitxer_anot2 = open(ruta_abs+"\TerrassaBuildings900\val\Annotation_"+train_or_val+".txt" , "r") #Obrim l'arxiu quan el valor de train_or_val sigui = a 'val'.
    Final_file = open(ruta_abs+"\files\Average_precision_"+train_or_val+".txt", "w") #Obrim l'arxiu on escriurem el AP per cada consulta  
    Final_file2 = open(ruta_abs+"\files\Mean_average_precision_"+train_or_val+".txt", "w") ##Obrim l'arxiu on escriurem el MAP per cada consulta  
    
    #Franc: a estas las creo para que no pete el código, luego se intentará depurar
    Final_file_train = open(ruta_abs+"/files/final_file_train.txt" ,'w')
    Final_file2_train = open(ruta_abs+"/files/final_file_train.txt" ,'w')
    Final_file_valid = open(ruta_abs+"/files/final_file_valid.txt" ,'w')
    Final_file2_valid = open(ruta_abs+"/files/final_file_valid.txt" ,'w')
    
    
    for line in Direct_llista:
        Final_file = np.random.rand(1,180) #Obrim el vector aleatori on s'inclouran el total de APs per cada consulta
        final = line.index("\n") #Indicació del final de línea de casa vector de AP's
        if train_or_val == "train":
            fitxer_anot = "Annotation_train.txt"
            APt = average_precision_score(Direct_llista,fitxer_anot)
            Final_file.append(APt)
            # A continuació escriurem en el fitxer cada línia de les APS per les imatges d'entrenamen
            Final_file_train.write(line[0:final] + "For Query "+line+":\t" + str(Final_file).replace("\n","").replace("[[","").replace("]]","") + "\n")
        else:
            fitxer_anot2 = "Annotation_valid.txt"
            APv = average_precision_score(Direct_llista,fitxer_anot2)
            Final_file.append(APv)
            # A continuació escriurem en el fitxer cada línia de les APS per les imatges de validació
            Final_file_valid.write(line[0:final] + "For Query "+line+":\t"+ str(Final_file).replace("\n","").replace("[[","").replace("]]","") + "\n") 
    Final_file_train.close() #Tanquem el fitxer corresponent a les imatges d'entrenament
    Final_file_valid.close() #Tanquem el fitxer corresponent a les imatges de validació

    for line in Final_file_train:
        for element in line:
            suma_train = (sum(line))
            sum_elems = (sum(element))
        print("Aps d'entrenament sumats!")
        Final_file2 = [] #Creem el array necessari per col·locar el valor del MAP
        MAP_train = suma_train/sum_elems #Fem la peració per obtenir aquest valor
        Final_file2.append(MAP_train) #Introduïm el valor resultant dintre del array creat
        #A Continuació esciurem el valor resultant del MAP dintre del fitxer de sortida
        Final_file2_train.write(line[0:final] + "For Query "+line+":\t" + "Mean_AVerage_Precision = "+str(Final_file2).replace("\n","").replace("[[","").replace("]]","") + "\n")
    Final_file_train.close() #Tanquem el fitxer per on hem llegit les dades dels APS de cada consulta
    Final_file2_train.close() #Tanquem el ftixer per on hem esccrit els valors del MAP resultants per cada línia
    
    
    for line in Final_file_valid:
        for element in line:
            suma_valid = (sum(line))
            sum_elems = (sum(element))
        print("Aps de validació sumats!")
        Final_file2 = [] #Creem el array necessari per col·locar el valor del MAP
        MAP_valid = suma_valid/sum_elems #Fem la peració per obtenir aquest valor
        Final_file2.append(MAP_valid) #Introduïm el valor resultant dintre del array creat
        #A Continuació esciurem el valor resultant del MAP dintre del fitxer de sortida
        Final_file2_valid.write(line[0:final] + "For Query "+line+":\t" + "Man_AVerage_Precision = "+str(Final_file2).replace("\n","").replace("[[","").replace("]]","") + "\n")
    Final_file_valid.close() #Tanquem el fitxer per on hem llegit les dades dels APS de cada consulta
    Final_file2_valid.close() #Tanquem el ftixer per on hem esccrit els valors del lMAP resultants per cada línia
예제 #9
0
def fscore(y_test, y_score):
    """

    :param y_test: output vector - predictions on the test set
    :param y_score: output vector which contains probabilities for each contained estimator
    :return: plot object
    """

    # binarize output vector
    y_test = binarize(y_test)
    print('y_test binarized shape = ', np.shape(y_test))
    n_classes = np.shape(y_test)[1]

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
                                                                    y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='micro-average Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='Precision-recall curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    return plt
예제 #10
0
def perfomance(y_test, y_pred, sample_weight=None, n=10):
    print ("P@"+str(n), Pat10(y_test, y_pred, n))

    fig, axes = plt.subplots(nrows=3, figsize=(6, 15))
    
    ax = axes[0]
    ax.grid(True)
    precision, recall, _ = precision_recall_curve(y_test,y_pred)
    
    print ("recalls_values",rec_values)
    prec_values = []
    for v in rec_values:
        prec_values.append(max(precision[recall > v]))
    print ("precision_values", prec_values)
    
    print ("average_precision_score", average_precision_score(y_test, y_pred, sample_weight=sample_weight))
    print ("roc_auc_score", roc_auc_score(y_test, y_pred))
        

    ax.step(recall, precision, color='b', alpha=0.2,
             where='post')
    ax.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')
    ax.set_xlabel('Recall', fontsize=10)
    ax.set_ylabel('Precision', fontsize=10)
    ax.set_ylim([0.0, 1.05])
    ax.set_xlim([0.0, 1.0])
    ax.tick_params(axis='x', labelsize=15)
    ax.tick_params(axis='y', labelsize=15)
    ax.set_title('2-class Precision-Recall curve: AP={0:0.3f}'.format(
              average_precision_score(y_test, y_pred, sample_weight=sample_weight)), fontsize=25)
    
    ax = axes[1]
    ax.grid(True)
    fpr, tpr, _ = roc_curve(y_test, y_pred, sample_weight=sample_weight)
    ax.plot(fpr, tpr)
    ax.set_title('ROC curve: roc_auc ={0:0.3f}'.format(
                 roc_auc_score(y_test, y_pred)), fontsize=25)
    ax.tick_params(axis='x', labelsize=15)
    ax.tick_params(axis='y', labelsize=15)
    ax.set_xlabel('FPR', fontsize=10)
    ax.set_ylabel('TPR', fontsize=10)
    
    ax = axes[2]
    bad_test = np.sum(y_test)
    good_test = len(y_test)-np.sum(y_test)
    ax.plot(sorted(y_pred[np.where( y_test == 0.)[0]], reverse=True), np.arange(good_test)/good_test*100, label = "good")
    ax.plot(sorted(y_pred[np.where( y_test == 1.)[0]]), np.arange(bad_test)/bad_test*100, label = "bad")
    ax.set_title('Predicted proba', fontsize=25)
    ax.tick_params(axis='x', labelsize=15)
    ax.tick_params(axis='y', labelsize=15)
    
    fig.subplots_adjust(hspace=0.5)
    plt.legend()
    plt.grid(True)
    plt.show()
    
    return precision, recall
예제 #11
0
파일: ass2.py 프로젝트: XiaopeiZhang/iit
def plotCurve(arr):
    X = arr[:, :-1]
    y = arr[:, -1]
    # Binarize the output
    y = label_binarize(y, classes=[0,1])
    n_classes = y.shape[1]

    # Add noisy features
    random_state = np.random.RandomState(0)
    n_samples, n_features = X.shape

    X = np.c_[X, random_state.randn(n_samples, 150 * n_features)]

    # Split into training and test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=random_state)

    # Run classifier
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
    y_score = classifier.fit(X_train, y_train).decision_function(X_test)

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score, average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    print(recall)
    print(precision)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.05])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"], label='micro-average Precision-recall curve (area = {0:0.2f})'''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i], label='Precision-recall curve of class {0} (area = {1:0.2f})'''.format(i, average_precision[i]))
    plt.xlim([0.0, 1.05])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()
def train_multilabel(features, targets, classes, train_split, test_split, C=1.0, ignore_hard_examples=True, after_ReLU=False, normalize_L2=False):
    print('\nHyperparameters:\n - C: {}\n - after_ReLU: {}\n - normL2: {}'.format(C, after_ReLU, normalize_L2))
    train_APs = []
    test_APs = []
    for class_id in range(len(classes)):
        
        classifier = SVC(C=C, kernel='linear') # http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
        
        if ignore_hard_examples:
            train_masks = (targets[train_split][:,class_id] != 0).view(-1, 1)
            train_features = torch.masked_select(features[train_split], train_masks.expand_as(features[train_split])).view(-1,features[train_split].size(1))
            train_targets = torch.masked_select(targets[train_split], train_masks.expand_as(targets[train_split])).view(-1,targets[train_split].size(1))
            test_masks = (targets[test_split][:,class_id] != 0).view(-1, 1)
            test_features = torch.masked_select(features[test_split], test_masks.expand_as(features[test_split])).view(-1,features[test_split].size(1))
            test_targets = torch.masked_select(targets[test_split], test_masks.expand_as(targets[test_split])).view(-1,targets[test_split].size(1))
        else:
            train_features = features[train_split]
            train_targets = targets[train_split]
            test_features = features[test_split]
            test_targets = features[test_split]

        if after_ReLU:
            train_features[train_features < 0] = 0
            test_features[test_features < 0] = 0

        if normalize_L2:
            train_norm = torch.norm(train_features, p=2, dim=1).unsqueeze(1)
            train_features = train_features.div(train_norm.expand_as(train_features))
            test_norm = torch.norm(test_features, p=2, dim=1).unsqueeze(1)
            test_features = test_features.div(test_norm.expand_as(test_features))

        train_X = train_features.numpy()
        train_y = (train_targets[:,class_id] != -1).numpy() # uses hard examples if not ignored

        test_X = test_features.numpy()
        test_y = (test_targets[:,class_id] != -1).numpy()

        classifier.fit(train_X, train_y) # train parameters of the classifier

        train_preds = classifier.predict(train_X)
        train_acc = accuracy_score(train_y, train_preds) * 100
        train_AP = average_precision_score(train_y, train_preds) * 100
        train_APs.append(train_AP)

        test_preds = classifier.predict(test_X)
        test_acc = accuracy_score(test_y, test_preds) * 100
        test_AP = average_precision_score(test_y, test_preds) * 100
        test_APs.append(test_AP)

        print('class "{}" ({}/{}):'.format(classes[class_id], test_y.sum(), test_y.shape[0]))
        print('  - {:8}: acc {:.2f}, AP {:.2f}'.format(train_split, train_acc, train_AP))
        print('  - {:8}: acc {:.2f}, AP {:.2f}'.format(test_split, test_acc, test_AP))

    print('all classes:')
    print('  - {:8}: mAP {:.4f}'.format(train_split, sum(train_APs)/len(classes)))
    print('  - {:8}: mAP {:.4f}'.format(test_split, sum(test_APs)/len(classes)))
예제 #13
0
def fscore_plot(classifier, X_test, y_test):


    # Binarize the output
    n_classes = max(y_test) - min(y_test) + 1

    y_test = label_binarize(y_test, classes=list(range(0,n_classes)))


    y_score = classifier.predict_proba(X_test)

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
        y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0], label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    plt.show()

    # Plot Precision-Recall curve for each class
    plt.clf()
    plt.plot(recall["micro"], precision["micro"],
             label='micro-average Precision-recall curve (area = {0:0.2f})'
                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='Precision-recall curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

    return plt
예제 #14
0
    def test_average_precision_score(self):
        result = self.df.metrics.average_precision_score(average='weighted')
        expected = metrics.average_precision_score(self.target, self.decision,
                                                   average='weighted')
        self.assertAlmostEqual(result, expected)
        # curve, _, _ = self.df.metrics.precision_recall_curve()
        # self.assertEqual(result, curve.mean())

        result = self.df.metrics.average_precision_score(average=None)

        expected = metrics.average_precision_score(self.target, self.decision, average=None)
        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
예제 #15
0
def test_score_scale_invariance():
    # Test that average_precision_score and auc_score are invariant by
    # the scaling or shifting of probabilities
    y_true, _, probas_pred = make_prediction(binary=True)
    roc_auc = auc_score(y_true, probas_pred)
    roc_auc_scaled = auc_score(y_true, 100 * probas_pred)
    roc_auc_shifted = auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled)
    assert_equal(roc_auc, roc_auc_shifted)
    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled)
    assert_equal(pr_auc, pr_auc_shifted)
def PR_multi_class(data_train, data_test, data_test_vectors):
    # Binarize the output
    y_train_label = label_binarize(data_train.target, classes=[0, 1, 2])
    n_classes = y_train_label.shape[1]
    
    random_state = np.random.RandomState(0)
    
    # shuffle and split training and test sets
    X_train, X_test, y_train, y_test = train_test_split(data_train_vectors, y_train_label, test_size=.5,
                                                        random_state=random_state)
    
    # Learn to predict each class against the other
    classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=random_state))
    classifier.fit(X_train, y_train)
    y_pred_score = classifier.decision_function(data_test_vectors)
    
    y_test_label = label_binarize(data_test.target, classes=[0, 1, 2])
    
    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_test_label[:, i], y_pred_score[:, i])
        average_precision[i] = average_precision_score(y_test_label[:, i], y_pred_score[:, i])
    
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], _ = precision_recall_curve(y_test_label.ravel(), y_pred_score.ravel())
    average_precision["micro"] = average_precision_score(y_test_label, y_pred_score, average="micro")
    
    # Plot Precision-Recall curve for each class
    plt.clf()
#    plt.plot(recall["micro"], precision["micro"],
#             label='micro-average PR curve (area = {0:0.2f})'
#                   ''.format(average_precision["micro"]))
    for i in range(n_classes):
        plt.plot(recall[i], precision[i],
                 label='PR curve of class {0} (area = {1:0.2f})'
                       ''.format(i, average_precision[i]))
    
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall curve of multi-class')
    plt.legend(loc="lower right")
    plt.show()
    return 0
def get_roc_score(edges_pos, edges_neg, score_matrix, apply_sigmoid=False):

    # Edge case
    if len(edges_pos) == 0 or len(edges_neg) == 0:
        return (None, None, None)

    # Store positive edge predictions, actual values
    preds_pos = []
    pos = []
    for edge in edges_pos:
        if apply_sigmoid == True:
            preds_pos.append(sigmoid(score_matrix[edge[0], edge[1]]))
        else:
            preds_pos.append(score_matrix[edge[0], edge[1]])
        pos.append(1) # actual value (1 for positive)
        
    # Store negative edge predictions, actual values
    preds_neg = []
    neg = []
    for edge in edges_neg:
        if apply_sigmoid == True:
            preds_neg.append(sigmoid(score_matrix[edge[0], edge[1]]))
        else:
            preds_neg.append(score_matrix[edge[0], edge[1]])
        neg.append(0) # actual value (0 for negative)
        
    # Calculate scores
    preds_all = np.hstack([preds_pos, preds_neg])
    labels_all = np.hstack([np.ones(len(preds_pos)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    # roc_curve_tuple = roc_curve(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)
    
    # return roc_score, roc_curve_tuple, ap_score
    return roc_score, ap_score
예제 #18
0
def GB_classifier_model_search(X, y, m_label):
	'''runs grid search for the gradient boosting classifer'''
	#split 80/20 train test
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	#create param grid for search
	param_grid = [{'learning_rate': [.01, .001], 'n_estimators': [1000], 'max_depth': [3,5,7]}]
	GB = GradientBoostingClassifier()
	print 'running GradientBoostingClassifier with grid search...'
	GBc = GridSearchCV(GB, param_grid, verbose = 2, cv = 2, n_jobs = -1) #2 k-folds
	GBc.fit(X_train, y_train)
	pred = GBc.predict_proba(X_test) #get back probabilities
	pred2 = GBc.predict(X_test) #get back predictions
	fpr, tpr, thresholds = roc_curve(y_test, pred[:,1])
	
	#get the AUC
	AUC = roc_auc_score(y_test, pred[:,1])
	
	#get the AUC for precision and recall curve
	AUC2 = average_precision_score(y_test, pred[:,1])
	recall = recall_score(y_test, pred2)
	precision = precision_score(y_test, pred2)
	
	#plot AUC
	plt.plot(fpr, tpr, label = '{} AUC = {}'.format(m_label,round(AUC,3)))
	v = np.linspace(0,1)
	plt.plot(v,v, linestyle = '--', color = 'k')
	plt.xlabel("False Postive Rate")
	plt.ylabel("True Postive Rate")
	plt.title('ROC Curve')
	plt.xlim(-0.05,1)
	plt.ylim(0,1.05)
	plt.axhline(1, color = 'k', linestyle = '--')
	plt.axvline(0, color = 'k', linestyle = '--')
	plt.legend()
	return GBc, recall, AUC, precision, AUC2
예제 #19
0
파일: __init__.py 프로젝트: teresa-m/EDeN
def estimate_model(positive_data_matrix=None,
                   negative_data_matrix=None,
                   target=None,
                   estimator=None,
                   n_jobs=4):
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    logger.info('Test set')
    logger.info(describe(X))
    logger.info('-' * 80)
    logger.info('Test Estimate')
    predictions = estimator.predict(X)
    margins = estimator.decision_function(X)
    logger.info(classification_report(y, predictions))
    apr = average_precision_score(y, margins)
    logger.info('APR: %.3f' % apr)
    roc = roc_auc_score(y, margins)
    logger.info('ROC: %.3f' % roc)

    logger.info('Cross-validated estimate')
    for scoring in ['accuracy', 'precision', 'recall', 'f1', 'average_precision', 'roc_auc']:
        scores = cross_validation.cross_val_score(estimator, X, y, cv=5,
                                                  scoring=scoring, n_jobs=n_jobs)
        logger.info('%20s: %.3f +- %.3f' % (scoring, np.mean(scores), np.std(scores)))

    return roc, apr
예제 #20
0
파일: base_learner.py 프로젝트: lmxl/hal
def bio_classification_report(y_true, y_pred):

    lb = LabelBinarizer()
    y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = list(chain.from_iterable(y_pred))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
    print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined))
    print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None),
        roc_auc_score(y_true_combined, y_pred_combined, average=None))
    #plt.figure()
    #fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined)
    #area = auc(fpr, tpr)
    #plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area))
    #plt.legend(loc=4)
    #plt.savefig('sub3.jpg')

    return classification_report(
        1 - y_true_combined,
        [0 if v > 0.1 else 1 for v in y_pred_combined],
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset,
    )
예제 #21
0
def get_auprc(predictions_true, predictions_false):
    predictions = predictions_true + predictions_false
    labels = [1] * len(predictions_true) + [0] * len(predictions_false)
    y_scores = numpy.array(predictions)
    y_true = numpy.array(labels)
    auprc = average_precision_score(y_true, y_scores)
    return auprc
예제 #22
0
 def plot_precision_recall(self, X_test, y_test, infos="", outfile="precision_recall.png"):
     """plot precicion-recall curve"""
     if self.trained:
         try:
             y_score = self.clf.decision_function(X_test)
         except:
             y_score = self.clf.predict_proba(X_test)[:, 1]
         precision, recall, _ = precision_recall_curve(y_test, y_score)
         average_precision = average_precision_score(
             y_test, y_score, average="micro")
         # Plot Precision-Recall curve for each class
         plt.clf()
         plt.plot(recall, precision,
                  label='Average Precision-recall curve (area = {0:0.2f})'
                  ''.format(average_precision))
         plt.xlim([0.0, 1.0])
         plt.ylim([0.0, 1.05])
         plt.xlabel('Recall')
         plt.ylabel('Precision')
         plt.title('Precision-Recall curve for %s (%s)' %
                   (self.method, infos))
         plt.legend(loc="lower right")
         plt.savefig(outfile)
     else:
         raise ValueError("Classifier is not trained")
예제 #23
0
def plot_pr(y_true_s, y_score, thresholds):
    # Plot of a ROC curve for a range of threshold values
    plt.figure()
    for t in thresholds:
        y_true = y_true_s.copy()
        y_true[y_true <= t] = 1
        y_true[y_true != 1] = 0

        # Compute ROC curve and ROC area 
        recall = dict()
        precision = dict()
        pr_auc = dict()
        
        # Compute micro-average ROC curve and ROC area
        # Compute micro-average ROC curve and ROC area
        precision["micro"], recall["micro"], _ = precision_recall_curve( 
                                    y_true, y_score, pos_label=1)
        pr_auc["micro"] = average_precision_score(y_true, y_score,
                                                     average="micro")    
        plt.plot(recall['micro'], precision['micro'], label='t = %0.2f A, area = %0.2f)' % (t, pr_auc['micro']))
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])        
        plt.title('Precision Recall Curve')
        plt.legend(loc="lower right")

    plt.show()
예제 #24
0
def main():
    x, y = readData("C:/Users/marro/Repo/CS584/Generative_Learning/Data/banknote/data_banknote_authentication.txt",",",scale=False)

    #shuffle
    p = np.random.permutation(len(x))
    x = x[p]
    y = y[p]

    # encode class labels
    classes, y = np.unique(y, return_inverse=True)

    print("Training accuracy: {}".format(getAccuracy(y,classifyAll(x,x,y),1)))
    print("Kfold Accuracy, recall, precission,tp,tn,fp,fn: {}".format(kfoldCrossValidation(x,y, 10, 1)))

    #precission recal curve
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(0,1):
        precision[i], recall[i], _ = precision_recall_curve(y,
                                                        classifyAll(x,x,y))
        average_precision[i] = average_precision_score(y, classifyAll(x,x,y))

    # Plot Precision-Recall curve
    plt.clf()
    plt.plot(recall[0], precision[0])#, label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    #plt.title('Precision-Recall Curve'.format(average_precision[0]))
    print(average_precision[0])
    plt.legend(loc="lower left")
    plt.show()
예제 #25
0
파일: curve.py 프로젝트: caskeep/interpret
    def explain_perf(self, X, y, name=None):
        if name is None:
            name = gen_name_from_class(self)

        X, y, self.feature_names, self.feature_types = unify_data(
            X, y, self.feature_names, self.feature_types
        )
        predict_fn = unify_predict_fn(self.predict_fn, X)
        scores = predict_fn(X)

        precision, recall, thresh = precision_recall_curve(y, scores)
        ap = average_precision_score(y, scores)

        abs_residuals = np.abs(y - scores)
        counts, values = np.histogram(abs_residuals, bins="doane")

        overall_dict = {
            "type": "perf_curve",
            "density": {"names": values, "scores": counts},
            "scores": scores,
            "x_values": recall,
            "y_values": precision,
            "threshold": thresh,
            "auc": ap,
        }
        internal_obj = {"overall": overall_dict, "specific": None}

        return PRExplanation(
            "perf",
            internal_obj,
            feature_names=self.feature_names,
            feature_types=self.feature_types,
            name=name,
        )
예제 #26
0
def plot_precision_recall_curves(target, feature, ax, sample_weight=None, color="blue", fn=""):
    pr, rc, thresholds = metrics.precision_recall_curve(target, feature, pos_label=1, sample_weight=sample_weight)
    average_precision_score = metrics.average_precision_score(target, feature, sample_weight=sample_weight)
    ax.plot(rc, pr, label="%s : %.3f" % (fn, average_precision_score), color=color)
    ax.set_xlabel("Recal")
    ax.set_ylabel("Precision")
    ax.legend(loc="best")
예제 #27
0
def mean_ap(distmat, query_ids=None, gallery_ids=None,
            query_cams=None, gallery_cams=None):
    distmat = to_numpy(distmat)
    m, n = distmat.shape
    # Fill up default values
    if query_ids is None:
        query_ids = np.arange(m)
    if gallery_ids is None:
        gallery_ids = np.arange(n)
    if query_cams is None:
        query_cams = np.zeros(m).astype(np.int32)
    if gallery_cams is None:
        gallery_cams = np.ones(n).astype(np.int32)
    # Ensure numpy array
    query_ids = np.asarray(query_ids)
    gallery_ids = np.asarray(gallery_ids)
    query_cams = np.asarray(query_cams)
    gallery_cams = np.asarray(gallery_cams)
    # Sort and find correct matches
    indices = np.argsort(distmat, axis=1)
    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])
    # Compute AP for each query
    aps = []
    for i in range(m):
        # Filter out the same id and same camera
        valid = ((gallery_ids[indices[i]] != query_ids[i]) |
                 (gallery_cams[indices[i]] != query_cams[i]))
        y_true = matches[i, valid]
        y_score = -distmat[i][indices[i]][valid]
        if not np.any(y_true): continue
        aps.append(average_precision_score(y_true, y_score))
    if len(aps) == 0:
        raise RuntimeError("No valid query")
    return np.mean(aps)
예제 #28
0
def evalualte_base(dataset, DV, model):
	start = time.time()
	# Load Data to Pandas
	data = pd.read_csv(dataset, index_col=0)
	data.columns = [camel_to_snake(col) for col in data.columns]

	if model == 'logit':
		#DV
		y = data[str(DV)]
		X = data[data.columns - [str(DV)]]

		clf = logit_clf(dataset, DV, 'yes')


	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

	# predict class labels for the test set
	predicted = clf.predict(X_test)

	#print predicted

	# generate class probabilities
	y_score = clf.predict_proba(X_test)
	
	# generate evaluation metrics
	print "Model score, accuracy : %.3f" % (metrics.accuracy_score(y_test, predicted))
	print "Model score, roc_auc: %.3f" % (metrics.roc_auc_score(y_test, y_score[:, 1]))
	print "Model score, f1: %.3f" % metrics.f1_score(y_test, predicted)
	print "Model score, average-precision: %.3f" % (metrics.average_precision_score(y_test, predicted))
	print "Model score, precision: %.3f" % (metrics.precision_score(y_test, predicted))
	print "Model score, recall: %.3f" % (metrics.recall_score(y_test, predicted))

	end = time.time()
	print "Runtime, K-folds evaluation of base model: %.3f" % (end-start), "seconds."
예제 #29
0
def AUCprc (filename="y_y_pred.txt"):
    
    """
    Compares AUC-PRC curve 

    Args:
        filename: name of output file for model
        

    Returns:
        (nothing)        
        the function plots a comparative AOC-PRC curve with value of AUC included
    """    
    #MODEL 
    df = pd.read_csv(filename, sep=' ', names=["y","y_pred"])
    y_predarray = np.array(df.y_pred)
    y_truearray = np.array(df.y)

    #AUCprc
    precision, recall, threshold = metrics.precision_recall_curve(y_truearray, y_predarray)
    average_precision = metrics.average_precision_score(y_truearray, y_predarray)
    
    # Plot curve
    plt.clf()
    plt.plot(recall, precision, label='case A: AUC={0:0.2f}'.format(average_precision))
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall')
    plt.legend(loc="lower left")
    plt.show()
    return
예제 #30
0
파일: ml.py 프로젝트: emreg00/repurpose
def check_ml_helper(drugs, disease_to_drugs, drug_to_index, list_M_similarity, pairs, classes, cv, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, split_both, output_f, model_fun, verbose, n_seed):
    # Get classification model
    clf = utilities.get_classification_model(model_type, model_fun, n_seed)
    all_auc = []
    all_auprc = []
    for i, (train, test) in enumerate(cv):
        file_name = None # for saving results
        pairs_train = pairs[train]
        classes_train = classes[train] 
        pairs_test = pairs[test]
        classes_test = classes[test] 
        if recalculate_similarity:
            drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_train, pairs_test = None, approach = "train_vs_train", file_name = file_name) 
        else:
            # Using similarity scores of all drugs, not only within the subset
            drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_train, pairs_test = pairs_test, approach = "train_test_vs_train_test", file_name = file_name) # similar to all_vs_all above, but removes the test pair
        X, y = get_scores_and_labels(pairs_train, classes_train, drug_to_disease_to_scores)
        if recalculate_similarity:
            drug_to_disease_to_scores = utilities.get_similarity_based_scores(drugs, disease_to_drugs, drug_to_index, list_M_similarity = list_M_similarity, knn = knn, pairs_train = pairs_test, pairs_test = None, approach = "train_vs_train", file_name = file_name) 
        X_new, y_new = get_scores_and_labels(pairs_test, classes_test, drug_to_disease_to_scores)
        probas_ = clf.fit(X, y).predict_proba(X_new)
        fpr, tpr, thresholds = roc_curve(y_new, probas_[:, 1]) 
        roc_auc = 100*auc(fpr, tpr)
        all_auc.append(roc_auc)
        prc_auc = 100*average_precision_score(y_new, probas_[:, 1])
        all_auprc.append(prc_auc)
        if verbose:
            print "Fold:", i+1, "# train:", len(pairs_train), "# test:", len(pairs_test), "AUC: %.1f" % roc_auc, "AUPRC: %.1f" % prc_auc
    #if verbose:
    #        print "AUC: %.1f (+/-%.1f):" % (numpy.mean(all_auc), numpy.std(all_auc)), all_auc
    if output_f is not None:
        output_f.write("%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n" % (n_fold, n_proportion, n_subset, model_type, prediction_type, "|".join(features), recalculate_similarity, disjoint_cv, split_both, "cv", numpy.mean(all_auc), numpy.std(all_auc), numpy.mean(all_auprc), numpy.std(all_auprc)))
    return numpy.mean(all_auc), numpy.mean(all_auprc)
예제 #31
0
            def validation_step(_x_val_gov, _x_val_art, _y_val, writer=None):
                print("_x_val_gov: ", len(_x_val_gov))
                print("_x_val_art: ", len(_x_val_art))
                """Evaluates model on a validation set"""
                batches_validation = \
                    feed.batch_iter(
                    list(zip(_x_val_gov,
                             _x_val_art,
                             _y_val)),
                    FLAGS.batch_size,
                    num_epochs=1,
                    shuffle=False)

                _eval_counter, _eval_loss = 0, 0.0

                _eval_pre_tk = [0.0] * FLAGS.top_num
                _eval_rec_tk = [0.0] * FLAGS.top_num
                _eval_F_tk = [0.0] * FLAGS.top_num

                true_onehot_labels = []
                predicted_onehot_scores = []
                predicted_onehot_labels_ts = []
                predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)]

                valid_count_correct_one = 0
                valid_count_label_one = 0
                valid_count_correct_zero = 0
                valid_count_label_zero = 0

                valid_step_count = 0
                for batch_validation in batches_validation:
                    valid_step_count += 1
                    x_batch_val_gov, x_batch_val_art, y_batch_val = \
                        zip(*batch_validation)
                    feed_dict = {
                        cnn.input_x_gov: x_batch_val_gov,
                        cnn.input_x_art: x_batch_val_art,
                        cnn.input_y: y_batch_val,
                        cnn.dropout_keep_prob: 1.0,
                        cnn.is_training: False
                    }
                    step, \
                    summaries, \
                    scores, \
                    cur_loss, \
                    input_y = sess.run(
                        [cnn.global_step,
                         validation_summary_op,
                         cnn.scores,
                         cnn.loss,
                         cnn.input_y],
                        feed_dict)

                    count_label_one, \
                    count_label_zero, \
                    count_correct_one, \
                    count_correct_zero = count_correct_pred(scores,
                                                            input_y)
                    valid_count_correct_one += count_correct_one
                    valid_count_label_one += count_label_one

                    valid_count_correct_zero += count_correct_zero
                    valid_count_label_zero += count_label_zero

                    print("[VALID] num_correct_answer is {} out of {}".format(
                        count_correct_one, count_label_one))
                    print("[VALID] num_correct_answer is {} out of {}".format(
                        count_correct_zero, count_label_zero))

                    # Prepare for calculating metrics
                    for i in y_batch_val:
                        true_onehot_labels.append(i)
                    for j in scores:
                        predicted_onehot_scores.append(j)

                    # Predict by threshold
                    batch_predicted_onehot_labels_ts = \
                        feed.get_onehot_label_threshold(scores=scores,
                                                        threshold=FLAGS.
                                                        threshold)

                    for k in batch_predicted_onehot_labels_ts:
                        predicted_onehot_labels_ts.append(k)

                    # Predict by topK
                    for _top_num in range(FLAGS.top_num):
                        batch_predicted_onehot_labels_tk = feed.\
                            get_onehot_label_topk(scores=scores,
                                                  top_num=_top_num + 1)

                        for i in batch_predicted_onehot_labels_tk:
                            predicted_onehot_labels_tk[_top_num].append(i)

                    _eval_loss = _eval_loss + cur_loss
                    _eval_counter = _eval_counter + 1

                    if writer:
                        writer.add_summary(summaries, step)

                logger.info("[VALID_FINAL] Total Correct One Answer is {} out "
                            "of {}".format(valid_count_correct_one,
                                           valid_count_label_one))
                logger.info("[VALID_FINAL] Total Correct Zero Answer is {} "
                            "out of {}".format(valid_count_correct_zero,
                                               valid_count_label_zero))

                _eval_loss = float(_eval_loss / _eval_counter)

                # Calculate Precision & Recall & F1 (threshold & topK)
                _eval_pre_ts = precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                _eval_rec_ts = recall_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')
                _eval_F_ts = f1_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_ts),
                    average='micro')

                for _top_num in range(FLAGS.top_num):
                    _eval_pre_tk[_top_num] = precision_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[_top_num]),
                        average='micro')
                    _eval_rec_tk[_top_num] = recall_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[_top_num]),
                        average='micro')
                    _eval_F_tk[_top_num] = f1_score(
                        y_true=np.array(true_onehot_labels),
                        y_pred=np.array(predicted_onehot_labels_tk[_top_num]),
                        average='micro')

                # Calculate the average AUC
                _eval_auc = roc_auc_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')
                # Calculate the average PR
                _eval_prc = average_precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_score=np.array(predicted_onehot_scores),
                    average='micro')

                return _eval_loss, _eval_auc, _eval_prc, _eval_rec_ts, \
                       _eval_pre_ts, _eval_F_ts, _eval_rec_tk, _eval_pre_tk, \
                       _eval_F_tk
예제 #32
0
def predict(args, model, tokenizer):
    """ Run prediction on test set. """

    # Make output dir if necessary
    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    # Get test data
    test_data = load_and_cache_dataset(args, tokenizer, 'test')
    test_queries = test_data["queries"]
    test_query_token_ids = test_data["query_token_ids"]
    candidate_token_ids = test_data["candidate_token_ids"]
    candidates = list(test_data["candidate2id"].keys())
    candidate_ids = list(test_data["candidate2id"].values())

    # Check if gold is available
    gold_available = "gold_hypernym_candidate_ids" in test_data and test_data[
        "gold_hypernym_candidate_ids"] is not None
    test_pos_candidate_ids = test_data[
        "gold_hypernym_candidate_ids"] if gold_availble else None

    # Write and log top k candidates for each test query.
    ranking_cutoff = 15

    logger.info("***** Running prediction *****")
    logger.info("  Nb queries: {}".format(len(test_queries)))
    logger.info("  Ranking cutoff: {}".format(ranking_cutoff))
    if gold_available:
        logger.info("  Evaluating ranking of candidates wrt gold hypernyms")
    else:
        logger.info(
            "  NOT evaluating ranking of candidates (gold hypernyms not available)"
        )

    # Accumulate average precision scores (if gold is available)
    ap_scores = []

    # Loop over queries
    total_test_loss = 0.0
    nb_queries = len(test_queries)
    for i in range(nb_queries):
        # Create a dataset for this query and all the candidates
        query_token_ids = test_query_token_ids[i]
        candidate_labels = [0] * len(candidate_ids)
        for candidate_id in test_pos_candidate_ids[i]:
            candidate_labels[candidate_id] = 1
        eval_dataset = make_dataset(tokenizer, [query_token_ids],
                                    candidate_token_ids, [candidate_ids],
                                    candidate_labels=[candidate_labels],
                                    max_length=args.max_seq_length,
                                    pad_on_left=False,
                                    pad_token=0,
                                    pad_token_segment_id=0,
                                    mask_padding_with_zero=True)
        logger.info(" *** Running prediction on query {} ('{}') ***".format(
            i, test_queries[i]))
        y_probs, y_true, test_loss = get_model_predictions(
            args, model, eval_dataset)
        total_test_loss += test_loss

        # Get top k candidates and their scores
        y_scores = y_probs[:, 1]
        top_k_candidate_ids = np.argsort(
            y_scores).tolist()[-ranking_cutoff:][::-1]
        tok_k_scores = [y_scores[i] for i in top_k_candidate_ids]
        top_candidates_and_scores.append(zip(top_k_candidate_ids,
                                             top_k_scores))

        # Evalute ranking if gold hypernyms are available
        if gold_available:
            y_score = y_probs[:, 1]
            ap = average_precision_score(y_true=y_true, y_score=y_score)
            ap_scores.append(ap)

        # FOR DEBUGGING
        logger.warning("  STOPPING FOR DEBUGGING PURPOSES")
        break

    # Compute average loss
    loss = total_test_loss / nb_queries
    results["loss"] = loss
    logger.info("***** Results *****")
    logger.info("  loss: {}".format(loss))

    # Compute mean average precision if gold hypernyms were available
    if gold_available:
        MAP = np.mean(ap_scores)
        results["MAP"] = MAP
        logger.info("  MAP: {}".format(MAP))

    # Write top k candidates and scores
    path_top_candidates = os.path.join(
        args.output_dir, "test_top_{}_candidates.tsv".format(ranking_cutoff))
    path_top_scores = os.path.join(
        args.output_dir, "test_top_{}_scores.tsv".format(ranking_cutoff))
    logger.info("Writing top {} candidates for each query to {}".format(
        ranking_cutoff, path_top_candidates))
    logger.info("Writing top {} scores for each query to {}".format(
        ranking_cutoff, path_top_scores))
    with open(path_top_candidates, 'w') as fc, open(path_top_scores,
                                                    'w') as fs:
        for i, topk in enumerate(top_candidates_and_scores):
            fc.write("{}\n".format("\t".join([c for (c, s) in topk])))
            fs.write("{}\n".format("\t".join(
                ["{:.5f}".format(s) for (c, s) in topk])))
            query = test_queries[i]
            topk_string = ', '.join(
                ["('{}',{:.5f})".format(c, s) for (c, s) in topk])
            logger.info("{}. Top candidates for '{}': {}".format(
                i + 1, query, topk_string))

    # Write average precision of each query
    if gold_available:
        output_eval_file = os.path.join(args.output_dir,
                                        "test_average_precision.txt")
        logger.info("  Writing average precision scores in {}".format(
            output_eval_file))
        with open(output_eval_file, "w") as writer:
            for ap in ap_scores:
                writer.write("{:.5f}\n".format(ap))

    return results
예제 #33
0
def train(X,
          y,
          weight_classes=True,
          n_iter_search=500,
          score='roc_auc',
          random_state=123):
    '''
    Train a binary SGD classifier using a randomized grid search with given scoring metric.

    Parameters:
        X (list-like): list of normalized attachment texts
        y (list-like): list of validated targets (0 = red, 1 = green)
        weight_classes (bool): whether or not to use the “balanced” mode to adjust class weights.
        n_iter_search (int):  number of parameter settings that are sampled. Trades off runtime vs quality
                              of the solution.
        score (str):  the scorer used to evaluate the predictions on the test set. `roc_auc` by
                      default. Available options include:  accuracy, roc_auc, precision, fbeta, recall.
                      Note: for fbeta, beta is set to 1.5 to favor recall of the positive class.
        random_state (int): sets the random seed for reproducibility.
    Returns:
        results (dict): a dict of scoring metrics and their values
        best_score (float): mean cross-validated score of the best_estimator.
        best_estimator (sklearn estimator): estimator that was chosen by the search
        best_params (dict): parameter setting that gave the best results on the hold out data.
    '''

    if weight_classes:
        clf = SGDClassifier(class_weight='balanced')
    else:
        clf = clf = SGDClassifier()
    scoring = {
        'accuracy': metrics.make_scorer(metrics.accuracy_score),
        'roc_auc': metrics.make_scorer(metrics.roc_auc_score),
        'precision': metrics.make_scorer(metrics.average_precision_score),
        'fbeta': metrics.make_scorer(metrics.fbeta_score, beta=.5),
        'recall': metrics.make_scorer(metrics.recall_score)
    }
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, stratify=y, test_size=0.2, random_state=random_state)
    pipe = Pipeline([('vectorizer', TfidfVectorizer(stop_words='english')),
                     ('select', SelectKBest(chi2)), ('clf', clf)])
    param_dist = get_param_distribution()
    random_search = RandomizedSearchCV(pipe,
                                       param_distributions=param_dist,
                                       scoring=scoring,
                                       refit=score,
                                       n_iter=n_iter_search,
                                       cv=5,
                                       n_jobs=-1,
                                       verbose=1,
                                       random_state=random_state)
    try:
        random_search.fit(X_train, y_train)
    except Exception as e:
        logger.error(f"Exception occurred training a new model:  \
                        {e}",
                     exc_info=True)
    y_pred = random_search.predict(X_test)
    #get the col number of the positive class (i.e. green)
    positive_class_col = list(random_search.classes_).index(1)
    try:
        y_score = random_search.predict_proba(X_test)[:, positive_class_col]
    except AttributeError:
        y_score = random_search.decision_function(X_test)
    average_precision = metrics.average_precision_score(y_test, y_score)
    acc = metrics.accuracy_score(y_test, y_pred)
    try:
        roc_auc = metrics.roc_auc_score(y_test, y_pred)
    except ValueError:
        roc_auc = None
    precisions, recalls, _ = metrics.precision_recall_curve(y_test, y_score)
    try:
        auc = metrics.auc(recalls, precisions)
    except ValueError:
        auc = None
    fbeta = metrics.fbeta_score(y_test, y_pred, beta=1.5)
    recall = metrics.recall_score(y_test, y_pred)
    best_estimator = random_search.best_estimator_
    best_params = random_search.best_params_
    best_score = random_search.best_score_
    result_values = [
        y_pred, y_score, precisions, recall, average_precision, acc, roc_auc,
        auc, fbeta, recalls, best_score, best_estimator, y_test
    ]
    result_keys = [
        'y_pred', 'y_score', 'precisions', 'recall', 'average_precision',
        'acc', 'roc_auc', 'auc', 'fbeta', 'recalls', 'best_score',
        'best_estimator', 'y_test'
    ]
    results = {k: v for k, v in zip(result_keys, result_values)}

    return results, best_score, best_estimator, best_params
예제 #34
0
def get_prc(labels, y):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        return metrics.average_precision_score(labels.cpu().numpy(),
                                               y.detach().cpu().numpy(),
                                               average='weighted')
예제 #35
0
                             grad_images=return_gradients,
                             load_and_evaluate_ckpt=sel_ckpts)

if len(scores.keys()) > 1:
    raise RuntimeError
scores = scores[0]
labs = labs[0]

# Create a mosaic
plot_mosaic(images.astype(np.uint8), title='Images', rc=10, cc=10)
plot_mosaic(labs, rc=10, cc=10, title='Labels', show_plot=True)
# plot_mosaic(labels, rc=10, cc=10, title='Labels', show_plot=True)
plot_mosaic(scores, rc=10, cc=10, title='Predictions', show_plot=True)

# Evaluate performance
map_score = metrics.average_precision_score(labs.reshape(batch_size, -1),
                                            scores.reshape(batch_size, -1))
p, r, thresh = metrics.precision_recall_curve(labs.reshape(batch_size, -1),
                                              scores.reshape(batch_size, -1),
                                              pos_label=1)
plt.step(r, p, color='b', alpha=0.2, where='post')
plt.fill_between(r, p, step='post', alpha=0.2, color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.show()
# np.savez(
#     version,
#     preds=preds,
#     all_scores=all_scores,
#     bin_labs=bin_labs,
예제 #36
0
print('score, decision tree: {}'.format(tree.score(X_test, y_test)))
Dummy = DummyClassifier().fit(X_train, y_train)
print('Score, dummy: {}'.format(Dummy.score(X_test, y_test)))
lr = LogisticRegression(C=0.1).fit(X_train, y_train)
print('Score, logistic regression: {}'.format(lr.score(X_test, y_test)))
# confusion matrix
pred_lr = lr.predict(X_test)
confusion = confusion_matrix(y_test, pred_lr)
print('Confusion Matrix for LogisticRegression:\n{}'.format(confusion))
print('F1-score for LogisticRegression: {}'.format(f1_score(y_test, pred_lr)))
# for multiclass f1-score can be used with average='micro','macro' or 'weighted'
print('Report:\n{}'.format(classification_report(y_test, pred_lr)))
# higher threshold higher precision, lower recall
precision, recall, threshold = precision_recall_curve(
    y_test, lr.decision_function(X_test))
close_zero = np.argmin(np.abs(threshold))
plt.plot(precision[close_zero], recall[close_zero], 'o')
plt.plot(precision, recall, '-')
plt.show()
print('Average precision score: {}'.format(
    average_precision_score(y_test, lr.decision_function(X_test))))
# ROC(Receiver Operating Characteristics) False Positive Rate (FPR) vs True Positive Rate(TPR: recall)
fpr, tpr, threshold = roc_curve(y_test, lr.decision_function(X_test))
plt.plot(fpr, tpr, label='ROC curve')
close_zero = np.argmin(np.abs(threshold))
plt.plot(fpr[close_zero], tpr[close_zero], 'o')
plt.show()
# AUC (area under curve)
print('AUC: {}'.format(roc_auc_score(y_test, lr.decision_function(X_test))))
# AUC is very useful on imbalanced data, but we'll need to adjust threshold. We get information that
# cannot be found from accuracy score only
예제 #37
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("pred_file",
                        type=str,
                        help="File path to list of predictions")
    parser.add_argument("-c",
                        "--class_scores",
                        type=str,
                        default=None,
                        help="Path to write class-specific APs")
    parser.add_argument("-q",
                        "--qual",
                        type=str,
                        default=None,
                        help="Path to write qualitative results")
    args = parser.parse_args()

    params = vars(args)

    # Load Attributes --------------------------------------------------------------------------------------------------
    attr_id_to_name, attr_id_to_idx = load_attributes()
    idx_to_attr_id = {v: k for k, v in attr_id_to_idx.iteritems()}
    n_attr = len(attr_id_to_idx)

    # Load predictions -------------------------------------------------------------------------------------------------
    # Construct a list of dicts containing: GT labels, Prediction probabilities, Image path
    pred_list = []
    with open(params['pred_file'], 'r') as f:
        for _line in f:
            line = _line.strip()
            dct = json.loads(line)

            pred_entry = dict()
            pred_entry['pred_probs'] = np.asarray(dct['pred_probs'],
                                                  dtype=float)

            # Read image_path and gt_labels from annotation
            anno_path = dct['anno_path'] if osp.exists(
                dct['anno_path']) else osp.join(DS_ROOT, dct['anno_path'])
            with open(anno_path) as jf:
                anno = json.load(jf)

                # Get the list of attributes this corresponds to
                attr_set = set(anno['labels'])
                attr_vec = labels_to_vec(attr_set, attr_id_to_idx)

                pred_entry['image_path'] = anno['image_path']
                pred_entry['gt_labels'] = attr_vec
                pred_entry['anno_path'] = dct['anno_path']

            pred_list.append(pred_entry)

    # Convert to matrix ------------------------------------------------------------------------------------------------
    # Create a NxM matrix. Each row represents the class-probabilities for the M classes.
    # In case of GT, they are 1-hot encoded
    gt_mat = np.array([d['gt_labels'] for d in pred_list])
    pred_probs_mat = np.array([d['pred_probs'] for d in pred_list])

    # Drop examples where gt contains no relevant attributes (when testing on a partial set)
    # non_empty_gt_idx = np.where(np.sum(gt_mat, axis=1) > 0)[0]
    # pred_probs_mat = pred_probs_mat[non_empty_gt_idx, :]
    # gt_mat = gt_mat[non_empty_gt_idx, :]

    # Evaluate Overall Attribute Prediction ----------------------------------------------------------------------------
    n_examples, n_labels = gt_mat.shape
    print '# Examples = ', n_examples
    print '# Labels = ', n_labels
    print 'Macro MAP = {:.2f}'.format(
        100 * average_precision_score(gt_mat, pred_probs_mat, average='macro'))

    if params['class_scores'] is not None:
        cmap_stats = average_precision_score(gt_mat,
                                             pred_probs_mat,
                                             average=None)
        with open(params['class_scores'], 'w') as wf:
            wf.write('\t'.join(
                ['attribute_id', 'attribute_name', 'num_occurrences', 'ap']) +
                     '\n')
            for idx in range(n_labels):
                attr_id = idx_to_attr_id[idx]
                attr_name = attr_id_to_name[attr_id]
                attr_occurrences = np.sum(gt_mat, axis=0)[idx]
                ap = cmap_stats[idx]

                wf.write('{}\t{}\t{}\t{}\n'.format(attr_id, attr_name,
                                                   attr_occurrences,
                                                   ap * 100.0))

    if params['qual'] is not None:
        if not osp.exists(params['qual']):
            print '{} does not exist. Creating it ...'.format(params['qual'])
            os.mkdir(params['qual'])
        for pred in pred_list:
            image_path = pred['image_path']
            im = Image.open(image_path)

            fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(20, 15))

            ax = ax1
            ax.imshow(im)
            ax.axis('off')

            ax = ax2
            text_str = ''
            pred_probs = pred['pred_probs']
            top_10_inds = np.argsort(-pred_probs)[:10]
            for aidx in top_10_inds:
                text_str += '{:<30} {:.3f}\n'.format(idx_to_attr_id[aidx],
                                                     pred_probs[aidx])
            ax.set_xlim(xmin=0, xmax=1)
            ax.set_ylim(ymin=0, ymax=1)
            ax.text(0.0, 0.5, text_str, fontsize='xx-large')
            ax.axis('off')

            plt.tight_layout()

            _, im_name = osp.split(image_path)
            out_path = osp.join(params['qual'], im_name)
            plt.savefig(out_path, bbox_inches='tight')
            plt.close()
print('---' * 45)

# How it should look like
print('---' * 45)
print('How it should be:\n')
print("Accuracy Score: {:.2f}".format(np.mean(undersample_accuracy)))
print("Precision Score: {:.2f}".format(np.mean(undersample_precision)))
print("Recall Score: {:.2f}".format(np.mean(undersample_recall)))
print("F1 Score: {:.2f}".format(np.mean(undersample_f1)))
print('---' * 45)

undersample_y_score = log_reg.decision_function(original_Xtest)

from sklearn.metrics import average_precision_score

undersample_average_precision = average_precision_score(
    original_ytest, undersample_y_score)

print('Average precision-recall score: {0:0.2f}'.format(
    undersample_average_precision))

from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12, 6))

precision, recall, _ = precision_recall_curve(original_ytest,
                                              undersample_y_score)

plt.step(recall, precision, color='#004a93', alpha=0.2, where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2, color='#48a6ff')
예제 #39
0
def classifier_plots(clf_trained, X_test, y_test, target_names:list, minority_idx:int=0, ylog:bool=False):
    """
    Get summary plots for a trained classifier
    
    Args:
        clf_trained: trained sklearn clf
        X_test (np.ndarray): withheld test data
        y_test (np.ndarray): withheld test data labels
        target_names (list): list of target labels/names
        minority_idx: (int): index for the minority class (e.g. 0, 1) 
        ylog (str): toggle log-scaling on yaxis
        
    Returns:
        None
    """
    
    """ Probabilty Dist """
    # get the probability distribution
    probas = clf_trained.predict_proba(X_test)

    # PLOT - count
    plt.figure(dpi=150)
    plt.hist(probas, bins=20)
    plt.title('Classification Probabilities')
    plt.xlabel('Probability')
    plt.ylabel('# of Instances')
    plt.xlim([0.5, 1.0])
    if ylog: plt.yscale('log')
    plt.legend(target_names)
    plt.show()
    
    
    # PLOT - density
    plt.figure(dpi=150)
    plt.hist(probas[:, minority_idx], bins=20, density=True)
    plt.title('Classification Density (Minority)')
    plt.xlabel('Probability')
    plt.ylabel('% of Total')
    if ylog: plt.yscale('log')
    plt.xlim([0, 1.0])
    plt.legend(target_names)
    plt.show()
    
    """ ROC curve """

    # get false and true positive rates
    fpr, tpr, _ = roc_curve(y_test, probas[:,0], pos_label=0)

    # get area under the curve
    clf_auc = auc(fpr, tpr)

    # PLOT ROC curve
    plt.figure(dpi=150)
    plt.plot(fpr, tpr, lw=1, color='green', label=f'AUC = {clf_auc:.3f}')
    plt.plot([0,1], [0,1], '--k', lw=0.5, label='Random')
    plt.title('ROC')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate (Recall)')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.legend()
    plt.show()
    
    """ Precision Recall Curve """

    # get precision and recall values
    precision, recall, _ = precision_recall_curve(y_test, probas[:,0], pos_label=0)

    # average precision score
    avg_precision = average_precision_score(y_test, probas[:,0])

    # precision auc
    pr_auc = auc(recall, precision)

    # plot
    plt.figure(dpi=150)
    plt.plot(recall, precision, lw=1, color='blue', label=f'AP={avg_precision:.3f}; AUC={pr_auc:.3f}')
    plt.fill_between(recall, precision, -1, facecolor='lightblue', alpha=0.5)

    plt.title('PR Curve')
    plt.xlabel('Recall (TPR)')
    plt.ylabel('Precision')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.legend()
    plt.show()
예제 #40
0
파일: eval.py 프로젝트: zanche/FIDGAN
def TSTR_eICU(identifier, epoch, generate=True, vali=True, CNN=False, do_OR=False, duplicate_synth=1, reverse=False):
    """
    """
    if vali:
        test_set = 'vali'
    else:
        test_set = 'test'
    data = np.load('./experiments/data/' + identifier + '.data.npy').item()
    samples = data['samples']
    train_X = samples['train']
    test_X = samples[test_set]
    labels = data['labels']
    train_Y = labels['train']
    test_Y = labels[test_set]
    if generate:
        # now sample from the model
        synth_Y = np.tile(train_Y, [duplicate_synth, 1])
        synth_X = model.sample_trained_model(identifier, epoch, num_samples=synth_Y.shape[0], C_samples=synth_Y)
        # for use in TRTS
        synth_testX = model.sample_trained_model(identifier, epoch, num_samples=test_Y.shape[0], C_samples=test_Y)
        synth_data = {'samples': synth_X, 'labels': synth_Y, 'test_samples': synth_testX, 'test_labels': test_Y}
        np.save('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy', synth_data)
    else:
        print('Loading pre-generated data')
        print('WARNING: not implemented for TRTS')
        # get "train" data
        exp_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item()
        synth_X = exp_data['samples']
        synth_Y = exp_data['labels']
        n_synth = synth_X.shape[0]
        synth_X = synth_X.reshape(n_synth, -1)
    #    pdb.set_trace()
    #    # ALERT ALERT MODIFYING
    #    synth_X = 2*(synth_X > 0) - 1
    orig_data = np.load('/cluster/home/hyland/eICU_task_data.npy').item()
    if reverse:
        which_setting = 'trts'
    # visualise distribution of errors for train and test
        print('Swapping synthetic test set in for real, to do TRTS!')
        test_X = synth_testX
    else:
        print('Doing normal TSTR')
        which_setting = 'tstr'
#    # get test data
#    test_X = data['test_X']
#    test_Y = data['test_Y']
    if not CNN:
        model_choice = 'RF'
         # if multivariate, reshape
        if len(test_X.shape) == 3:
            test_X = test_X.reshape(test_X.shape[0], -1)
        if len(train_X.shape) == 3:
            train_X = train_X.reshape(train_X.shape[0], -1)
        if len(synth_X.shape) == 3:
            synth_X = synth_X.reshape(synth_X.shape[0], -1)
    else:
        raise ValueError(CNN)
        model_choice = 'CNN'
    # we will select the best validation set epoch based on F1 score, take average across all the tasks
    score_list = []
    for label in range(synth_Y.shape[1]):
        task = orig_data['Y_columns'][label]
        if vali:
            if not task in ['low_sao2', 'high_heartrate', 'low_respiration']:
                print('Skipping task', task, 'because validation evaluation.')
                continue
        print('Evaluating on task:', task)
        #print('(', np.mean(synth_Y[:, label]), 'positive in train, ', np.mean(test_Y[:, label]), 'in test)')
        #m = RandomForestClassifier(n_estimators=50).fit(synth_X, synth_Y[:, label])
        #m = SVC(gamma=0.001).fit(synth_X, synth_Y[:, label])
        synth_classifier = RandomForestClassifier(n_estimators=100).fit(synth_X, synth_Y[:, label])
        synth_predY = synth_classifier.predict(test_X)
        synth_predY_prob = synth_classifier.predict_proba(test_X)[:, 1]
        real_classifier = RandomForestClassifier(n_estimators=100).fit(train_X, train_Y[:, label])
        real_predY = real_classifier.predict(test_X)
        real_predY_prob = real_classifier.predict_proba(test_X)[:, 1]
        #print('(predicted', np.mean(predict), 'positive labels)')
        
        synth_prec, synth_recall, synth_f1, synth_support = precision_recall_fscore_support(test_Y[:, label], synth_predY, average='weighted')
        synth_accuracy = accuracy_score(test_Y[:, label], synth_predY)
        synth_auprc = average_precision_score(test_Y[:, label], synth_predY_prob)
        synth_auroc = roc_auc_score(test_Y[:, label], synth_predY_prob)
        synth_scores = [synth_prec, synth_recall, synth_f1, synth_accuracy, synth_auprc, synth_auroc]

        real_prec, real_recall, real_f1, real_support = precision_recall_fscore_support(test_Y[:, label], real_predY, average='weighted')
        real_accuracy = accuracy_score(test_Y[:, label], real_predY)
        real_auprc = average_precision_score(test_Y[:, label], real_predY_prob)
        real_auroc = roc_auc_score(test_Y[:, label], real_predY_prob)
        real_scores = [real_prec, real_recall, real_f1, real_accuracy, real_auprc, real_auroc]
        
        all_scores = synth_scores + real_scores

        if vali:
            report_file = open('./experiments/tstr/vali.' + which_setting + '_report.v3.csv', 'a')
            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
            report_file.close()
        else:
            report_file = open('./experiments/tstr/' + which_setting + '_report.v3.csv', 'a')
            report_file.write('eICU_' + task + ',' + identifier + ',' + model_choice + ',' + str(epoch) + ',' + ','.join(map(str, all_scores)) + '\n')
            report_file.close()
        
        print(classification_report(test_Y[:, label], synth_predY))
        print(classification_report(test_Y[:, label], real_predY))
        if task in ['low_sao2', 'high_heartrate', 'low_respiration']:
            score_list.append(synth_auprc + synth_auroc)

    if do_OR:
        raise NotImplementedError
        # do the OR task
        extreme_heartrate_test = test_Y[:, 1] + test_Y[:, 4]
        extreme_respiration_test = test_Y[:, 2] + test_Y[:, 5]
        extreme_systemicmean_test = test_Y[:, 3] + test_Y[:, 6]
        Y_OR_test = np.vstack([extreme_heartrate_test, extreme_respiration_test, extreme_systemicmean_test]).T
        Y_OR_test = (Y_OR_test > 0)*1

        extreme_heartrate_synth = synth_Y[:, 1] + synth_Y[:, 4]
        extreme_respiration_synth = synth_Y[:, 2] + synth_Y[:, 5]
        extreme_systemicmean_synth = synth_Y[:, 3] + synth_Y[:, 6]
        Y_OR_synth = np.vstack([extreme_heartrate_synth, extreme_respiration_synth, extreme_systemicmean_synth]).T
        Y_OR_synth = (Y_OR_synth > 0)*1

        OR_names = ['extreme heartrate', 'extreme respiration', 'extreme MAP']
        OR_results = []
        for label in range(Y_OR_synth.shape[1]):
            print('task:', OR_names[label])
            print('(', np.mean(Y_OR_synth[:, label]), 'positive in train, ', np.mean(Y_OR_test[:, label]), 'in test)')
            m = RandomForestClassifier(n_estimators=500).fit(synth_X, Y_OR_synth[:, label])
            predict = m.predict(X_test)
            print('(predicted', np.mean(predict), 'positive labels)')
            accuracy = accuracy_score(Y_OR_test[:, label], predict)
            precision = sklearn.metrics.precision_score(Y_OR_test[:, label], predict)
            recall = sklearn.metrics.recall_score(Y_OR_test[:, label], predict)
            print(accuracy, precision, recall)
            OR_results.append([accuracy, precision, recall])
    else:
        OR_results = []

    score_across_tasks = np.mean(np.array(score_list))
    return score_across_tasks
예제 #41
0
 def lp_test(self, pred, y):
     y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()
     return roc_auc_score(y, pred), average_precision_score(y, pred)
예제 #42
0
def average_precision(y_true, y_pred, sample_weight):
    return average_precision_score(y_true, y_pred)  
def aps(X, y, model):
    probs = model.predict_proba(X)[:, 1]
    return average_precision_score(y, probs)
def aps2(X, y, model):
    probs = model.decision_function(X)
    return average_precision_score(y, probs)
예제 #45
0
def pr_auc(preds, data):
    y_true = data.get_label()
    score = average_precision_score(y_true, preds)
    return "pr_auc", score, True
예제 #46
0
파일: DNN.py 프로젝트: xzhang2016/DeepHE
def buildModel(dataDict, numFeat, numberOfClasses, f_tp, f_fp, f_th, expName,
               iteration, model_dir, result_dir):
    trainData = dataDict['train']
    trainLabel = dataDict['trainLabel']
    validData = dataDict['valid']
    validLabel = dataDict['validLabel']
    testData = dataDict['test']
    testLabel = dataDict['testLabel']

    # building NN model
    model = Sequential()
    model.add(
        Dense(hl[0],
              activation=paramDict['activation1'],
              input_shape=(numFeat, )))
    model.add(Dropout(paramDict['dropOut']))
    for i in range(1, numHidden):
        if i < len(hl):
            model.add(Dense(hl[i], activation=paramDict['activation1']))
            model.add(Dropout(paramDict['dropOut']))
        else:
            model.add(Dense(1024, activation=paramDict['activation1']))
            model.add(Dropout(paramDict['dropOut']))

    model.add(Dense(numberOfClasses, activation=paramDict['activation2']))

    model.compile(optimizer=optimizerDict['adam'],
                  loss=paramDict['loss'],
                  metrics=paramDict['metrics'])

    # saving best model by validation accuracy
    filePath = os.path.join(model_dir,
                            expName + str(iteration) + '_weights.best.hdf5')
    checkpointer = ModelCheckpoint(filepath=filePath,
                                   verbose=0,
                                   monitor=paramDict['monitor'],
                                   save_best_only=True)
    earlystopper = EarlyStopping(paramDict['monitor'], patience=15, verbose=1)

    # fit the model to the training data and verify with validation data
    model.fit(trainData,
              trainLabel,
              epochs=paramDict['epoch'],
              callbacks=[checkpointer, earlystopper],
              batch_size=paramDict['batchSize'],
              shuffle=True,
              verbose=1,
              validation_data=(validData, validLabel),
              class_weight=class_weight)

    # load best model and compile
    model.load_weights(filePath)
    model.compile(optimizer=optimizerDict['adam'],
                  loss=paramDict['loss'],
                  metrics=paramDict['metrics'])

    # serialize model to JSON (save the model structure in order to use the saved weights)
    #one time save
    fn = os.path.join(model_dir, 'model3.json')
    if not os.path.isfile(fn):
        model_json = model.to_json()
        with open(fn, 'w') as json_file:
            json_file.write(model_json)

    #save model for later use (including model structure and weights)
    model_file = os.path.join(model_dir,
                              expName + str(iteration) + '_model.h5')
    model.save(model_file)

    # evaluation scores
    roc_auc = metrics.roc_auc_score(testLabel, model.predict(testData))

    #precision here is the auc of precision-recall curve
    precision = metrics.average_precision_score(testLabel,
                                                model.predict(testData))

    # get predicted class label
    probs = model.predict_proba(testData)
    testPredLabel = model.predict(testData)
    true_y = list()
    for y_i in range(len(testLabel)):
        true_y.append(testLabel[y_i][1])
    probs = probs[:, 1]

    fpr, tpr, threshold = metrics.roc_curve(true_y, probs)

    for i in range(len(fpr)):
        f_fp.write(str(fpr[i]) + '\t')
    f_fp.write('\n')

    for i in range(len(tpr)):
        f_tp.write(str(tpr[i]) + '\t')
    f_tp.write('\n')

    for i in range(len(threshold)):
        f_th.write(str(threshold[i]) + '\t')
    f_th.write('\n')

    #save precision, recall, and thresholds for PR curve plot
    p0, r0, t0 = metrics.precision_recall_curve(true_y, probs)
    fnp0 = os.path.join(result_dir, expName + '_precision.txt')
    fnr0 = os.path.join(result_dir, expName + '_recall.txt')
    fnt0 = os.path.join(result_dir, expName + '_PR_threshold.txt')
    with open(fnp0, 'a') as f0:
        for i in range(len(p0)):
            f0.write(str(p0[i]) + '\t')
        f0.write('\n')

    with open(fnr0, 'a') as f0:
        for i in range(len(r0)):
            f0.write(str(r0[i]) + '\t')
        f0.write('\n')

    with open(fnt0, 'a') as f0:
        for i in range(len(t0)):
            f0.write(str(t0[i]) + '\t')
        f0.write('\n')

    # convert back class label from categorical to integer label
    testLabelRev = np.argmax(testLabel, axis=1)
    testPredLabelRev = np.argmax(testPredLabel, axis=1)

    # get TP, TN, FP, FN to calculate sensitivity, specificity, PPV and accuracy
    TP, TN, FP, FN = getTPTNValues(testLabelRev, testPredLabelRev)

    sensitivity = float(TP) / float(TP + FN)
    specificity = float(TN) / float(TN + FP)
    PPV = float(TP) / float(TP + FP)
    accuracy = float(TP + TN) / float(TP + FP + FN + TN)

    # dictionary to store evaluation stat
    evaluationInfo = {
        'roc_auc': roc_auc,
        'precision': precision,
        'sensitivity': sensitivity,
        'specificity': specificity,
        'PPV': PPV,
        'accuracy': accuracy,
        'batch_size': paramDict['batchSize'],
        'activation': paramDict['activation2'],
        'dropout': paramDict['dropOut']
    }

    return evaluationInfo
예제 #47
0
def eval_approx(args, smean, sconc, device, test_loader, ood_loader,
                teacher_test_samples, teacher_ood_samples):
    smean.eval()
    sconc.eval()
    miscls_origin = []
    miscls_approx = []
    entros_origin_1 = []
    fentros_approx_1 = []
    entros_approx_1 = []
    entros_origin_2 = []
    fentros_approx_2 = []
    entros_approx_2 = []
    maxp_origin_1 = []
    maxp_approx_1 = []
    maxp_origin_2 = []
    maxp_approx_2 = []
    gvalue_approx_1 = []
    gvalue_approx_2 = []

    batch_idx = 0
    with torch.no_grad():
        for data, target in test_loader:

            data, target = data.to(device), target.to(device)
            data = data.view(data.shape[0], -1)

            g_out = F.softplus(sconc(data))
            f_out = F.softmax(smean(data), dim=1)
            pi_q = f_out.mul(g_out)

            samples_p_pi = teacher_test_samples[
                batch_idx * test_loader.batch_size:(batch_idx + 1) *
                test_loader.batch_size].to(device)
            avg_origin_output = torch.mean(samples_p_pi, dim=1)

            pi_p_avg_batch = avg_origin_output
            origin_result = torch.argmax(pi_p_avg_batch, dim=1)
            approx_result = torch.argmax(pi_q, dim=1)

            miscls_approx.append(
                (1 - (approx_result == target).float()).cpu().numpy())
            miscls_origin.append(
                (1 - (origin_result == target).float()).cpu().numpy())

            entro_origin = (-torch.bmm(
                pi_p_avg_batch.view(data.shape[0], 1, -1),
                torch.log(pi_p_avg_batch.view(data.shape[0], -1, 1)))).view(-1)

            fentro_approx = (-torch.bmm(
                f_out.view(data.shape[0], 1, -1),
                torch.log(f_out.view(data.shape[0], -1, 1)))).view(-1)

            alpha = pi_q
            alpha0 = alpha.sum(1)

            entro_approx = torch.lgamma(alpha).sum(1) \
                           - torch.lgamma(alpha0) \
                           + (alpha0 - 10).mul(torch.digamma(alpha0)) \
                           - ((alpha - 1 ).mul(torch.digamma(alpha))).sum(1)

            entros_origin_1.append(entro_origin.cpu().numpy())
            fentros_approx_1.append(fentro_approx.cpu().numpy())
            entros_approx_1.append(entro_approx.cpu().numpy())

            maxp_origin = 1. / torch.max(pi_p_avg_batch, dim=1)[0]
            maxp_approx = 1. / torch.max(f_out, dim=1)[0]

            maxp_origin_1.append(maxp_origin.cpu().numpy())
            maxp_approx_1.append(maxp_approx.cpu().numpy())
            gvalue_approx_1.append(1. / g_out.cpu().numpy())
            batch_idx += 1

    miscls_approx = np.concatenate(miscls_approx)
    miscls_origin = np.concatenate(miscls_origin)
    entros_origin_1 = np.concatenate(entros_origin_1)
    fentros_approx_1 = np.concatenate(fentros_approx_1)
    maxp_origin_1 = np.concatenate(maxp_origin_1)
    maxp_approx_1 = np.concatenate(maxp_approx_1)
    gvalue_approx_1 = np.concatenate(gvalue_approx_1)
    correct_approx = np.sum(1 - miscls_approx)
    correct_ensemble = np.sum(1 - miscls_origin)

    print("AUROC (entros_origin_1): ",
          roc_auc_score(miscls_origin, entros_origin_1))
    print("AUROC (hentros_approx_1): ",
          roc_auc_score(miscls_approx, fentros_approx_1))
    print("AUROC (maxp_approx_1):   ",
          roc_auc_score(miscls_approx, maxp_approx_1))
    print("AUROC (maxp_origin_1):   ",
          roc_auc_score(miscls_origin, maxp_origin_1))
    print("AUROC (gvalue_approx_1): ",
          roc_auc_score(miscls_approx, gvalue_approx_1))
    print("AUPR  (entros_origin_1): ",
          average_precision_score(miscls_origin, entros_origin_1))
    print("AUPR  (hentros_approx_1): ",
          average_precision_score(miscls_approx, fentros_approx_1))
    print("AUPR  (maxp_approx_1):   ",
          average_precision_score(miscls_approx, maxp_approx_1))
    print("AUPR  (maxp_origin_1):   ",
          average_precision_score(miscls_origin, maxp_origin_1))
    print("AUPR  (gvalue_approx_1): ",
          average_precision_score(miscls_approx, gvalue_approx_1))
    print('approx ACC :', correct_approx / (len(test_loader.dataset)))
    print('ensemble ACC :', correct_ensemble / (len(test_loader.dataset)))

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(ood_loader):
            data, target = data.to(device), target.to(device)
            data = data.view(data.shape[0], -1)

            g_out = F.softplus(sconc(data))
            f_out = F.softmax(smean(data), dim=1)
            pi_q = f_out.mul(g_out)

            samples_p_pi = teacher_ood_samples[
                batch_idx * ood_loader.batch_size:(batch_idx + 1) *
                ood_loader.batch_size].to(device)

            avg_origin_output = torch.mean(samples_p_pi, dim=1)

            pi_p_avg_batch = avg_origin_output

            entro_origin = (-torch.bmm(
                pi_p_avg_batch.view(data.shape[0], 1, -1),
                torch.log(pi_p_avg_batch.view(data.shape[0], -1, 1)))).view(-1)

            fentro_approx = (-torch.bmm(
                f_out.view(data.shape[0], 1, -1),
                torch.log(f_out.view(data.shape[0], -1, 1)))).view(-1)

            entros_origin_2.append(entro_origin.cpu().numpy())
            fentros_approx_2.append(fentro_approx.cpu().numpy())

            alpha = pi_q
            alpha0 = alpha.sum(1)

            entro_approx = torch.lgamma(alpha).sum(1) \
                           - torch.lgamma(alpha0) \
                           + (alpha0 - 10).mul(torch.digamma(alpha0)) \
                           - ((alpha - 1 ).mul(torch.digamma(alpha))).sum(1)

            entros_approx_2.append(entro_approx.cpu().numpy())

            maxp_origin = 1. / torch.max(pi_p_avg_batch, dim=1)[0]
            maxp_approx = 1. / torch.max(f_out, dim=1)[0]

            maxp_origin_2.append(maxp_origin.cpu().numpy())
            maxp_approx_2.append(maxp_approx.cpu().numpy())
            gvalue_approx_2.append(1. / g_out.cpu().numpy())
            batch_idx += 1

        entros_origin_2 = np.concatenate(entros_origin_2)
        fentros_approx_2 = np.concatenate(fentros_approx_2)
        maxp_origin_2 = np.concatenate(maxp_origin_2)
        maxp_approx_2 = np.concatenate(maxp_approx_2)
        gvalue_approx_2 = np.concatenate(gvalue_approx_2)

        fentros_approx = np.concatenate([fentros_approx_1, fentros_approx_2])
        entros_origin = np.concatenate([entros_origin_1, entros_origin_2])
        maxp_approx = np.concatenate([maxp_approx_1, maxp_approx_2])
        maxp_origin = np.concatenate([maxp_origin_1, maxp_origin_2])
        gvalue_approx = np.concatenate([gvalue_approx_1, gvalue_approx_2])
        ood = np.concatenate([
            np.zeros(test_loader.dataset.__len__()),
            np.ones(ood_loader.dataset.__len__())
        ])

        print("-----------------------")
        print("AUROC (entros_origin): ", roc_auc_score(ood, entros_origin))
        print("AUROC (hentros_approx): ", roc_auc_score(ood, fentros_approx))
        print("AUROC (maxp_approx):   ", roc_auc_score(ood, maxp_approx))
        print("AUROC (maxp_origin):   ", roc_auc_score(ood, maxp_origin))
        print("AUROC (gvalue_approx): ", roc_auc_score(ood, gvalue_approx))
        print("AUPR  (entros_origin): ",
              average_precision_score(ood, entros_origin))
        print("AUPR  (hentros_approx): ",
              average_precision_score(ood, fentros_approx))
        print("AUPR  (maxp_approx):   ",
              average_precision_score(ood, maxp_approx))
        print("AUPR  (maxp_origin):   ",
              average_precision_score(ood, maxp_origin))
        print("AUPR  (gvalue_approx): ",
              average_precision_score(ood, gvalue_approx))
예제 #48
0
def plot_imagewise_classification_roc(
    results,
    truths,
    num_class,
    classes,
):
    """
    :param results:
[
    [
        pigment, soft_deposit,
    ] x number-of-images
]
    :param truths:
[
    {
        'filename': 'a.jpg',
        'width': 1280,
        'height': 720,
        "pigment": int,
        "soft_deposit": int,
        'ann': {
            'bboxes': <np.ndarray> (n, 4 (xmin, ymin, xmax, ymax)),
            'labels': <np.ndarray> (n, ),
        }
    } x number-of-images
]
    :return:
    """

    assert len(results) == len(truths)

    predict = [[], []]
    for item in results:
        predict[0].append(item[0])
        predict[1].append(item[1])

    gt = [[], []]
    for item in truths:
        gt[0].append(item['pigment'])
        gt[1].append(item['soft_deposit'])

    all_auc = []
    all_AP = []
    for i in range(num_class):
        auc = metrics.roc_auc_score(
            y_true=gt[i],
            y_score=predict[i],
        )
        fpr, tpr, thresholds = metrics.roc_curve(
            y_true=gt[i],
            y_score=predict[i],
        )

        with open(
                dir_path +
                '/visualization/imagewise_classification_roc_{}.csv'.format(i),
                'a') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow(thresholds)
            writer.writerow(fpr)
            writer.writerow(tpr)

        precision, recall, thresholds = metrics.precision_recall_curve(
            y_true=gt[i],
            probas_pred=predict[i],
        )
        AP = metrics.average_precision_score(
            y_true=gt[i],
            y_score=predict[i],
        )

        plt.figure()
        plt.title('ROC')
        plt.xlabel('False Positive rate')
        plt.ylabel('True Positive rate')
        plt.ylim(0, 1)
        plt.plot(fpr, tpr, label='AUC: ' + str(auc))
        plt.legend()
        plt.savefig(
            dir_path +
            '/visualization/imagewise_classification_roc_{}.png'.format(i))

        plt.figure()
        plt.title('Precision-Recall')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.axis([0, 1, 0, 1])
        plt.plot(recall, precision, label='mAP: ' + str(AP))
        plt.savefig(
            dir_path +
            '/visualization/imagewise_classification_prc_{}.png'.format(i))
        print('auc for class {} is: {}'.format(i, auc))
        print('AP for class {} is: {}'.format(i, AP))

        all_auc.append(auc)
        all_AP.append(AP)

    # class-average
    print('class-average auc is: {}'.format(np.mean(all_auc)))
    print('class-average AP is: {}'.format(np.mean(all_AP)))

    # pure-average
    flat_predict_list = []
    for sublist in predict:
        for item in sublist:
            flat_predict_list.append(item)
    flat_gt_list = []
    for sublist in gt:
        for item in sublist:
            flat_gt_list.append(item)

    auc = metrics.roc_auc_score(
        y_true=flat_gt_list,
        y_score=flat_predict_list,
    )
    fpr, tpr, thresholds = metrics.roc_curve(
        y_true=flat_gt_list,
        y_score=flat_predict_list,
    )

    precision, recall, thresholds = metrics.precision_recall_curve(
        y_true=flat_gt_list,
        probas_pred=flat_predict_list,
    )
    AP = metrics.average_precision_score(
        y_true=flat_gt_list,
        y_score=flat_predict_list,
    )

    plt.figure()
    plt.title('ROC')
    plt.xlabel('False Positive rate')
    plt.ylabel('True Positive rate')
    plt.ylim(0, 1)
    plt.plot(fpr, tpr, label='AUC: ' + str(auc))
    plt.legend()
    plt.savefig(dir_path +
                '/visualization/imagewise_classification_roc_all.png')

    plt.figure()
    plt.title('Precision-Recall')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.axis([0, 1, 0, 1])
    plt.plot(recall, precision, label='mAP: ' + str(AP))
    plt.savefig(dir_path +
                '/visualization/imagewise_classification_prc_all.png')
    print('pure average auc is: {}'.format(auc))
    print('pure average AP is: {}'.format(AP))
horizontal_flip = True,
vertical_flip = True,
rotation_range = 30)
le = LabelEncoder().fit(labels)
trainY = np_utils.to_categorical(le.transform(trainY), 2)
#    le = LabelEncoder().fit(testY)
testY = np_utils.to_categorical(le.transform(testY), 2)
print("[INFO] compiling model...")
model=ZFNET()
model.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])
H = model.fit(train_datagen.flow(trainX, trainY, batch_size = 32), validation_data=(testX, testY), steps_per_epoch=len(trainX) // 32,epochs=5, verbose=1)
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=20)
precision=precision_score(testY.argmax(axis=1), predictions.argmax(axis=1), average='weighted')
average_precision = average_precision_score(testY, predictions)
recall = recall_score(testY.argmax(axis=1), predictions.argmax(axis=1),average='weighted')
AUCC.append(average_precision)
REC.append(recall)
PR.append(precision)
print('AUPR',average_precision,'presi',precision,'recall',recall)
#    print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=1), target_names=le.classes_))
model.save('ZFNET.HDF5')
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(2):
 fpr[i], tpr[i], _ = metrics.roc_curve(testY[:, i], predictions[:, i])
 roc_auc[i] = metrics.auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
예제 #50
0
                                                    random_state=random_state)
classifier = OneVsRestClassifier(
    svm.SVC(kernel='linear', probability=True, random_state=random_state))
y_score = classifier.fit(X_train, y_train).decision_function(X_test)

# Compute Precision-Recall and plot curve
# 下面的下划线是返回的阈值。作为一个名称:此时“_”作为临时性的名称使用。
# 表示分配了一个特定的名称,但是并不会在后面再次用到该名称。
precision = dict()
recall = dict()
average_precision = dict()
for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(
        y_test[:, i], y_score[:, i]
    )  # The last precision and recall values are 1. and 0. respectively and do not have a corresponding threshold. This ensures that the graph starts on the x axis.
    average_precision[i] = average_precision_score(
        y_test[:, i], y_score[:, i])  # 切片,第i个类的分类结果性能

# Compute micro-average curve and area. ravel()将多维数组降为一维
precision["micro"], recall["micro"], _ = precision_recall_curve(
    y_test.ravel(), y_score.ravel())
average_precision["micro"] = average_precision_score(
    y_test, y_score, average="micro"
)  # This score corresponds to the area under the precision-recall curve.

# Plot Precision-Recall curve for each class
plt.clf()  # clf 函数用于清除当前图像窗口
plt.plot(recall["micro"],
         precision["micro"],
         label='micro-average Precision-recall curve (area = {0:0.2f})'.format(
             average_precision["micro"]))
for i in range(n_classes):
예제 #51
0
def get_ROC_PR_data(data, clf, pos_label_=None, verbose=False):
    
        """
        source, df, clf = get_ROC_PR_data(data, clf, verbose=False)
        
        get_ROC_data will return ColumnDataSource and dataframes with TPR and FPR
        for a particular dataset and an untrained classifier. The CSD can be used
        to plot a Bokeh plot while the dataframe can be used for additional
        exploration and plotting with other libs. Note that the dataframes
        are returned with metadata (e.g. AUC and the clf used). 
        
        data:        tuple of our data (X_train, X_test, y_train, y_test)
                         where each item in the tuple is a numpy ndarray
        clf:         an untrained classifier (e.g. rf = RandomForestClassifier())
        pos_label_:  if targets are not binary (0, 1) then indicate integer for "positive" [default: None]
        verbose:     print warnings [default: False]
        """

        # split data into training, testing
        (X_train, X_test, y_train, y_test) = data

        # train and retrieve probabilities of class per feature for the test data
        probas = clf.fit(X_train, y_train).predict_proba(X_test)

        # get false and true positive rates for positive labels
        #    (and thresholds, which is not used but shown here for fyi)
        if not pos_label_:
            pos_label_ = np.max(y_train)
            if verbose:
                print(f"Warning: Maximum target value of '{pos_label_}' used as positive.")
                print("You can use 'pos_label_' to indicate your own.")

        """ ROC """
        fpr, tpr, roc_thresholds = roc_curve(y_test, probas[:,1], pos_label=pos_label_)
        roc_thresholds[0] = np.nan
                                                           
        # get area under the curve (AUC)
        roc_auc = auc(fpr, tpr)
            
        """ PR """
        # get precision and recall values
        precision, recall, pr_thresholds = precision_recall_curve(y_test, probas[:,1], pos_label=pos_label_)
        pr_thresholds = np.insert(pr_thresholds, 0, 0) # do this to correct lengths

        # average precision score
        avg_precision = average_precision_score(y_test, probas[:,1])

        # precision auc
        pr_auc = auc(recall, precision)

        
        """ Create Sources """
        # create legend variables - we'll create an array with len(tpr)
        roc_auc_ = [f"AUC: {roc_auc:.3f}"]*len(tpr)
        pr_auc_ = [f"AUC: {pr_auc:.3f}"]*len(precision)
        clf_name = get_clf_name(clf)
        clf_roc = [f"{clf_name}, AUC: {roc_auc:.3f}"]*len(tpr)
        clf_pr = [f"{clf_name}, AUC: {pr_auc:.3f}"]*len(precision)

        # create bokeh column source for plotting new ROC  
        source_ROC = ColumnDataSource(data=dict(x_fpr=fpr, 
                                                y_tpr=tpr,                                                
                                                thresh_roc=roc_thresholds,
                                                auc_legend=roc_auc_, 
                                                clf_legend=clf_roc))
        
        source_PR = ColumnDataSource(data=dict(x_rec=recall, 
                                               y_prec=precision,                                                
                                               thresh_pr=pr_thresholds,
                                               auc_legend=pr_auc_, 
                                               clf_legend=clf_pr))
        
        """ Dataframes """
        # create output dataframe with TPR and FPR, and metadata
        df_ROC = pd.DataFrame({'TPR':tpr, 'FPR':fpr, 'Thresholds':roc_thresholds})
        df_ROC.auc = roc_auc
        df_ROC.clf = get_clf_name(clf)
        df_ROC.score = clf.score(X_test, y_test)
        
        # create output dataframe with TPR and FPR, and metadata
        df_PR = pd.DataFrame({'Recall':recall, 'Precision':precision, 'Thresholds':pr_thresholds})
        df_PR.auc = pr_auc
        df_PR.clf = get_clf_name(clf)
        df_PR.score = clf.score(X_test, y_test)
        
        return source_ROC, source_PR, df_ROC, df_PR, clf
예제 #52
0
                'class': key,
                'prob': new_probs[jk]
            }
            all_dets.append(det)

    print('Elapsed time = {}'.format(time.time() - st))
    t, p = get_map(all_dets, img_data['bboxes'], (fx, fy))
    for key in t.keys():
        if key not in T:
            T[key] = []
            P[key] = []
        T[key].extend(t[key])  # extend list
        P[key].extend(p[key])  # extend list
    all_aps = []
    for key in T.keys():
        ap = average_precision_score(T[key], P[key])
        print(len(T[key]), len(P[key]))

        print('{} AP: {}'.format(key, ap))
        all_aps.append(ap)
        if idx == 20:
            print(T)
            print(P)
    f_map = np.mean(np.array(all_aps))
    print('mAP = {}'.format(f_map))

    #print(T)
    #print(P)
print('final map = %f' % f_map)
K.clear_session()
'''
예제 #53
0
def plot_precision_recall_curve(clf,
                                X,
                                y,
                                title='Precision-Recall Curve',
                                do_split=True,
                                test_split_ratio=0.33,
                                random_state=None,
                                ax=None):
    """Generates the Precision-Recall curve for a given classifier and dataset.

    Args:
        clf: Classifier instance that implements "fit" and "predict_proba" methods.

        X (array-like, shape (n_samples, n_features)):
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y (array-like, shape (n_samples) or (n_samples, n_features)):
            Target relative to X for classification.

        title (string, optional): Title of the generated plot. Defaults to "Precision-Recall Curve".

        do_split (bool, optional): If True, the dataset is split into training and testing sets.
            The classifier is trained on the training set and the Precision-Recall curves are
            plotted using the performance of the classifier on the testing set. If False, the
            Precision-Recall curves are generated without splitting the dataset or training the
            classifier. This assumes that the classifier has already been called with its `fit`
            method beforehand.

        test_split_ratio (float, optional): Used when do_split is set to True. Determines the
            proportion of the entire dataset to use in the testing split. Default is set to 0.33.

        random_state (int :class:`RandomState`): Pseudo-random number generator state used
            for random sampling.

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot
            the learning curve. If None, the plot is drawn on a new set of axes.

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn.

    Example:
            >>> nb = classifier_factory(GaussianNB())
            >>> nb.plot_precision_recall_curve(X, y, random_state=1)
            <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
            >>> plt.show()

        .. image:: _static/examples/plot_precision_recall_curve.png
           :align: center
           :alt: Precision Recall Curve
    """
    if not hasattr(clf, 'predict_proba'):
        raise TypeError('"predict_proba" method not in classifier. '
                        'Cannot calculate Precision-Recall Curve.')

    if not do_split:
        classes = clf.classes_
        probas = clf.predict_proba(X)
        y_true = y

    else:
        X_train, X_test, y_train, y_test = train_test_split(
            X,
            y,
            test_size=test_split_ratio,
            stratify=y,
            random_state=random_state)
        clf_clone = clone(clf)
        probas = clf_clone.fit(X_train, y_train).predict_proba(X_test)
        classes = clf_clone.classes_
        y_true = y_test

    # Compute Precision-Recall curve and area for each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(len(classes)):
        precision[i], recall[i], _ = precision_recall_curve(
            y_true, probas[:, i], pos_label=classes[i])

    y_true = label_binarize(y_true, classes=classes)
    if len(classes) == 2:
        y_true = np.hstack((1 - y_true, y_true))

    for i in range(len(classes)):
        average_precision[i] = average_precision_score(y_true[:, i], probas[:,
                                                                            i])

    # Compute micro-average ROC curve and ROC area
    micro_key = 'micro'
    i = 0
    while micro_key in precision:
        i += 1
        micro_key += str(i)

    precision[micro_key], recall[micro_key], _ = precision_recall_curve(
        y_true.ravel(), probas.ravel())
    average_precision[micro_key] = average_precision_score(y_true,
                                                           probas,
                                                           average='micro')

    if ax is None:
        fig, ax = plt.subplots(1, 1)

    ax.set_title(title)
    for i in range(len(classes)):
        ax.plot(recall[i],
                precision[i],
                lw=2,
                label='Precision-recall curve of class {0} '
                '(area = {1:0.3f})'.format(classes[i], average_precision[i]))
    ax.plot(recall[micro_key],
            precision[micro_key],
            lw=2,
            color='gold',
            label='micro-average Precision-recall curve '
            '(area = {0:0.3f})'.format(average_precision[micro_key]))

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision')
    ax.legend(loc='best')
    return ax
예제 #54
0
# In[34]:

recall = dict()
av_precision = dict()

# In[35]:

test_y_lb = label_binarize(test_y, classes=[1, 2, 3, 4, 5, 6, 7])
best_pred = label_binarize(best_pred, classes=[1, 2, 3, 4, 5, 6, 7])

# In[36]:

for i in range(7):
    precision[i], recall[i], _ = precision_recall_curve(
        test_y_lb[:, i], best_pred[:, i])
    av_precision[i] = average_precision_score(test_y_lb[:, i], best_pred[:, i])

# In[74]:

precision

# In[72]:

recall

# In[77]:

for i in range(7):
    plt.plot(recall[i], precision[i])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
def test_rcnn():
    """Test RCNN model."""

    # Load data
    logger.info("✔︎ Loading data...")
    logger.info("Recommended padding Sequence length is: {0}".format(
        FLAGS.pad_seq_len))

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data_and_labels(FLAGS.test_data_file,
                                        FLAGS.num_classes,
                                        FLAGS.embedding_dim,
                                        data_aug_flag=False)

    logger.info("✔︎ Test data padding...")
    x_test, y_test = dh.pad_data(test_data, FLAGS.pad_seq_len)
    y_test_labels = test_data.labels

    # Load rcnn model
    BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ")

    while not (BEST_OR_LATEST.isalpha()
               and BEST_OR_LATEST.upper() in ['B', 'L']):
        BEST_OR_LATEST = input(
            "✘ The format of your input is illegal, please re-input: ")
    if BEST_OR_LATEST.upper() == 'B':
        logger.info("✔︎ Loading best model...")
        checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir,
                                                 select_maximum_value=True)
    else:
        logger.info("✔︎ Loading latest model...")
        checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    logger.info(checkpoint_file)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{0}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name(
                "dropout_keep_prob").outputs[0]
            is_training = graph.get_operation_by_name("is_training").outputs[0]

            # Tensors we want to evaluate
            scores = graph.get_operation_by_name("output/scores").outputs[0]
            loss = graph.get_operation_by_name("loss/loss").outputs[0]

            # Split the output nodes name by '|' if you have several output nodes
            output_node_names = "output/scores"

            # Save the .pb model file
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names.split("|"))
            tf.train.write_graph(output_graph_def,
                                 "graph",
                                 "graph-rcnn-{0}.pb".format(MODEL),
                                 as_text=False)

            # Generate batches for one epoch
            batches = dh.batch_iter(list(zip(x_test, y_test, y_test_labels)),
                                    FLAGS.batch_size,
                                    1,
                                    shuffle=False)

            test_counter, test_loss = 0, 0.0

            test_pre_tk = [0.0] * FLAGS.top_num
            test_rec_tk = [0.0] * FLAGS.top_num
            test_F_tk = [0.0] * FLAGS.top_num

            # Collect the predictions here
            true_labels = []
            predicted_labels = []
            predicted_scores = []

            # Collect for calculating metrics
            true_onehot_labels = []
            predicted_onehot_scores = []
            predicted_onehot_labels_ts = []
            predicted_onehot_labels_tk = [[] for _ in range(FLAGS.top_num)]

            for batch_test in batches:
                x_batch_test, y_batch_test, y_batch_test_labels = zip(
                    *batch_test)
                feed_dict = {
                    input_x: x_batch_test,
                    input_y: y_batch_test,
                    dropout_keep_prob: 1.0,
                    is_training: False
                }
                batch_scores, cur_loss = sess.run([scores, loss], feed_dict)

                # Prepare for calculating metrics
                for i in y_batch_test:
                    true_onehot_labels.append(i)
                for j in batch_scores:
                    predicted_onehot_scores.append(j)

                # Get the predicted labels by threshold
                batch_predicted_labels_ts, batch_predicted_scores_ts = \
                    dh.get_label_threshold(scores=batch_scores, threshold=FLAGS.threshold)

                # Add results to collection
                for i in y_batch_test_labels:
                    true_labels.append(i)
                for j in batch_predicted_labels_ts:
                    predicted_labels.append(j)
                for k in batch_predicted_scores_ts:
                    predicted_scores.append(k)

                # Get onehot predictions by threshold
                batch_predicted_onehot_labels_ts = \
                    dh.get_onehot_label_threshold(scores=batch_scores, threshold=FLAGS.threshold)
                for i in batch_predicted_onehot_labels_ts:
                    predicted_onehot_labels_ts.append(i)

                # Get onehot predictions by topK
                for top_num in range(FLAGS.top_num):
                    batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(
                        scores=batch_scores, top_num=top_num + 1)

                    for i in batch_predicted_onehot_labels_tk:
                        predicted_onehot_labels_tk[top_num].append(i)

                test_loss = test_loss + cur_loss
                test_counter = test_counter + 1

            # Calculate Precision & Recall & F1 (threshold & topK)
            test_pre_ts = precision_score(
                y_true=np.array(true_onehot_labels),
                y_pred=np.array(predicted_onehot_labels_ts),
                average='micro')
            test_rec_ts = recall_score(
                y_true=np.array(true_onehot_labels),
                y_pred=np.array(predicted_onehot_labels_ts),
                average='micro')
            test_F_ts = f1_score(y_true=np.array(true_onehot_labels),
                                 y_pred=np.array(predicted_onehot_labels_ts),
                                 average='micro')

            for top_num in range(FLAGS.top_num):
                test_pre_tk[top_num] = precision_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                    average='micro')
                test_rec_tk[top_num] = recall_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                    average='micro')
                test_F_tk[top_num] = f1_score(
                    y_true=np.array(true_onehot_labels),
                    y_pred=np.array(predicted_onehot_labels_tk[top_num]),
                    average='micro')

            # Calculate the average AUC
            test_auc = roc_auc_score(y_true=np.array(true_onehot_labels),
                                     y_score=np.array(predicted_onehot_scores),
                                     average='micro')

            # Calculate the average PR
            test_prc = average_precision_score(
                y_true=np.array(true_onehot_labels),
                y_score=np.array(predicted_onehot_scores),
                average="micro")
            test_loss = float(test_loss / test_counter)

            logger.info(
                "☛ All Test Dataset: Loss {0:g} | AUC {1:g} | AUPRC {2:g}".
                format(test_loss, test_auc, test_prc))

            # Predict by threshold
            logger.info(
                "☛ Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}"
                .format(test_pre_ts, test_rec_ts, test_F_ts))

            # Predict by topK
            logger.info("☛ Predict by topK:")
            for top_num in range(FLAGS.top_num):
                logger.info(
                    "Top{0}: Precision {1:g}, Recall {2:g}, F {3:g}".format(
                        top_num + 1, test_pre_tk[top_num],
                        test_rec_tk[top_num], test_F_tk[top_num]))

            # Save the prediction result
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)
            dh.create_prediction_file(output_file=SAVE_DIR +
                                      "/predictions.json",
                                      data_id=test_data.testid,
                                      all_labels=true_labels,
                                      all_predict_labels=predicted_labels,
                                      all_predict_scores=predicted_scores)

    logger.info("✔︎ Done.")
예제 #56
0
def _compute_score(model, X, y, scoring_metric=None, scoring_params=None):
    '''Helper function that maps metric string names to their function calls.
    
    Parameters
    ----------
    model : class inheriting sklearn.base.BaseEstimator
        The classifier whose hyperparams you need to optimize with grid search.
        The model must have model.fit(X,y) and model.predict(X) defined. Although it can
        work without it, its best if you also define model.score(X,y) so you can decide
        the scoring function for deciding the best parameters. If you are using an
        sklearn model, everything will work out of the box. To use a model from a
        different library is no problem, but you need to wrap it in a class and
        inherit sklearn.base.BaseEstimator as seen in:
        https://github.com/cgnorthcutt/hyperopt 
        
    X : np.array of shape (n, m)
        The training data.

    y : np.array of shape (n,) or (n, 1)
        Corresponding labels.
        
    scoring_metric : str
        See hypopt.GridSearch.fit() scoring parameter docstring 
        for list of options.
        
    scoring_params : dict
        All other params you want passed to the scoring function.
        Params will be passed as scoring_func(**scoring_params).'''

    if scoring_params is None:
        scoring_params = {}

    if scoring_metric == 'accuracy':
        return metrics.accuracy_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'brier_score_loss':
        return metrics.brier_score_loss(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'average_precision':
        return metrics.average_precision_score(y,
                                               model.predict_proba(X)[:, 1],
                                               **scoring_params)
    elif scoring_metric == 'f1':
        return metrics.f1_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'f1_micro':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='micro',
                                **scoring_params)
    elif scoring_metric == 'f1_macro':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='macro',
                                **scoring_params)
    elif scoring_metric == 'f1_weighted':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='weighted',
                                **scoring_params)
    elif scoring_metric == 'neg_log_loss':
        return -1. * metrics.log_loss(y, model.predict_proba(X), **
                                      scoring_params)
    elif scoring_metric == 'precision':
        return metrics.precision_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'recall':
        return metrics.recall_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'roc_auc':
        return metrics.roc_auc_score(y,
                                     model.predict_proba(X)[:, 1],
                                     **scoring_params)
    elif scoring_metric == 'explained_variance':
        return metrics.explained_variance_score(y, model.predict(X),
                                                **scoring_params)
    elif scoring_metric == 'neg_mean_absolute_error':
        return -1. * metrics.mean_absolute_error(y, model.predict(X), **
                                                 scoring_params)
    elif scoring_metric == 'neg_mean_squared_error':
        return -1. * metrics.mean_squared_error(y, model.predict(X), **
                                                scoring_params)
    elif scoring_metric == 'neg_mean_squared_log_error':
        return -1. * metrics.mean_squared_log_error(y, model.predict(X), **
                                                    scoring_params)
    elif scoring_metric == 'neg_median_absolute_error':
        return -1. * metrics.median_absolute_error(y, model.predict(X), **
                                                   scoring_params)
    elif scoring_metric == 'r2':
        return metrics.r2_score(y, model.predict(X), **scoring_params)
    else:
        raise ValueError(scoring_metric + 'is not a supported metric.')
예제 #57
0
                               random_state=0)

cdsw.track_metric("numTrees", param_numTrees)
cdsw.track_metric("maxDepth", param_maxDepth)
cdsw.track_metric("impurity", param_impurity)

randF.fit(pdTrain[features], pdTrain['label'])

predictions = randF.predict(pdTest[features])

#temp = randF.predict_proba(pdTest[features])

pd.crosstab(pdTest['label'],
            predictions,
            rownames=['Actual'],
            colnames=['Prediction'])

list(zip(pdTrain[features], randF.feature_importances_))

y_true = pdTest['label']
y_scores = predictions
auroc = roc_auc_score(y_true, y_scores)
ap = average_precision_score(y_true, y_scores)
print(auroc, ap)

cdsw.track_metric("auroc", auroc)
cdsw.track_metric("ap", ap)

pickle.dump(randF, open("models/sklearn_rf.pkl", "wb"))

cdsw.track_file("models/sklearn_rf.pkl")
        balanced_accuracy_np = np.zeros(len(random_seed_list))
        log_loss_np = np.zeros(len(random_seed_list))
        avg_precision_np = np.zeros(len(random_seed_list))
        for k, seed in enumerate(random_seed_list):
            random.seed(int(seed))
            rnd_inds = random.sample(range(x_test.shape[0]), int(0.8*x_test.shape[0])) 
            curr_y_test = y_test[rnd_inds]
            curr_x_test = x_test[rnd_inds, :]
            y_pred = np.argmax(rnn.predict_proba(curr_x_test), -1)
            y_pred_proba = rnn.predict_proba(curr_x_test)[:, 1]
            y_score = y_pred_proba

            roc_auc_np[k] = roc_auc_score(curr_y_test, y_score)
            balanced_accuracy_np[k] = balanced_accuracy_score(curr_y_test, y_pred)
            log_loss_np[k] = log_loss(curr_y_test, y_pred_proba, normalize=True) / np.log(2)
            avg_precision_np[k] = average_precision_score(curr_y_test, y_score)
        
        print('tslice : %s, ROC-AUC : %.2f'%(tslice, np.percentile(roc_auc_np, 50)))
        
        for prctile in prctile_vals:
            row_dict = dict()
            row_dict['model'] = 'RNN'
            row_dict['percentile'] = prctile
            row_dict['tslice'] = tslice
            row_dict['roc_auc'] = np.percentile(roc_auc_np, prctile)
            row_dict['balanced_accuracy'] = np.percentile(balanced_accuracy_np, prctile)
            row_dict['log_loss'] = np.percentile(log_loss_np, prctile)
            row_dict['average_precision'] = np.percentile(avg_precision_np, prctile)

            perf_df = perf_df.append(row_dict, ignore_index=True)      
    
예제 #59
0
파일: train.py 프로젝트: yaniv256/MedGraph
def train(args):
    np.random.RandomState(46)

    # Load the data into MedGraph data structure
    graph_file = 'data/%s.npz' % args.dataset
    data_loader = DataLoader(graph_file)

    display_freq = 10  # Frequency of displaying the training results

    # Set user-defined settings in the data loader
    data_loader.embedding_dim = args.embedding_dim
    data_loader.vc_batch_size = args.vc_batch_size
    data_loader.K = args.K
    data_loader.learning_rate = args.learning_rate
    data_loader.is_gauss = args.gauss
    data_loader.distance = args.distance
    data_loader.is_time_dis = args.time_dis

    model = MedGraph(data_loader)

    # Number of training iterations in each epoch
    global_step = 0
    num_iter = len(data_loader.vv_train) // args.vv_batch_size
    print('Number of iterations per epoch: {}'.format(num_iter))

        with tf.Session() as sess:
            tf.global_variables_initializer().run()
            for epoch in range(args.num_epochs):
                start_time = time.time()
                tot_loss = 0
                data = data_loader.sequential_randomize_vv_sequences(data_loader.vv_train_seq)

                for iteration in range(num_iter):
                    global_step += 1
                    start = iteration * args.vv_batch_size
                    end = (iteration + 1) * args.vv_batch_size if iteration < num_iter else data[0].shape[0]

                    # Fetch vv sequences for the current batch
                    (batch_vv_inputs, batch_time_train_in, batch_time_train_out, batch_out_mask,
                     batch_vv_outputs) = data_loader.fetch_vv_batch(data, start, end)

                    # Fetch vc edges for the current batch
                    (vc_u_i, vc_u_j, vc_label) = data_loader.fetch_vc_batch(batch_size=args.vc_batch_size, K=args.K)

                    # Run optimization operation (backprop)
                    feed_dict_batch = {
                        model.X_visits: sparse_feeder(data_loader.X_visits_train),
                        model.vv_inputs: batch_vv_inputs,
                        model.vv_outputs: batch_vv_outputs,
                        model.vv_in_time: batch_time_train_in,
                        model.vv_out_time: batch_time_train_out,
                        model.vv_out_mask: batch_out_mask,
                        model.vc_u_i: vc_u_i,
                        model.vc_u_j: vc_u_j,
                        model.vc_label: vc_label}
                    loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict_batch)
                    tot_loss += loss

                print("Epoch {:3d}:\t Training loss: {:.4f}\t Time taken: {:.4f}sec".format(epoch + 1,
                                                                                          tot_loss / num_iter,
                                                                                          time.time() - start_time))

                # Run validation and test after every epoch

                # Predict for validation set
                feed_dict_valid = {model.X_visits: sparse_feeder(data_loader.X_visits_val),
                                   model.vv_inputs: data_loader.vv_valid_seq[0],
                                   model.vv_in_time: data_loader.vv_valid_seq[1],
                                   model.vv_out_time: data_loader.vv_valid_seq[2],
                                   model.vv_out_mask: data_loader.vv_valid_seq[3]}
                y_pred_valid = sess.run(model.y, feed_dict=feed_dict_valid)
                # Calculate validation set evaluation metrics
                val_auc = roc_auc_score(y_true=data_loader.vv_valid_seq[4], y_score=y_pred_valid)
                val_ap = average_precision_score(y_true=data_loader.vv_valid_seq[4], y_score=y_pred_valid)

                # Predict for test set
                feed_dict_test = {model.X_visits: sparse_feeder(data_loader.X_visits_test),
                                  model.vv_inputs: data_loader.vv_test_seq[0],
                                  model.vv_in_time: data_loader.vv_test_seq[1],
                                  model.vv_out_time: data_loader.vv_test_seq[2],
                                  model.vv_out_mask: data_loader.vv_test_seq[3]}
                y_pred_test = sess.run(model.y, feed_dict=feed_dict_test)
                # Calculate test set evaluation metrics
                test_auc = roc_auc_score(y_true=data_loader.vv_test_seq[4], y_score=y_pred_test)
                test_ap = average_precision_score(y_true=data_loader.vv_test_seq[4], y_score=y_pred_test)

                # Save visit and code embeddings for test data (we use the same mapping dictionary)
                if args.gauss:
                    mu, sigma = sess.run([model.embedding, model.sigma], feed_dict=feed_dict_test)
                    np.save('emb/%s_embedding.npy' % args.dataset,
                            {'mu': data_loader.embedding_mapping(mu),
                             'sigma': data_loader.embedding_mapping(sigma)})
                else:
                    mu = sess.run(model.embedding, feed_dict=feed_dict_test)
                    np.save('emb/%s_embedding.npy' % args.dataset, data_loader.embedding_mapping(mu))

                print(
                    "Validation AUC: {:.4f}\t Validation AP: {:.4f}\t Test AUC: {:.4f}\t Test AP: {:.4f}\t".format(
                        val_auc, val_ap, test_auc, test_ap))
                print('----------------------------------------------------------------------------------------------------------------------------------')
y_score = classifier.decision_function(X_test)

# %%
# The average precision score in multi-label settings
# ...................................................
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

# For each class
precision = dict()
recall = dict()
average_precision = dict()
for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(
        Y_test[:, i], y_score[:, i])
    average_precision[i] = average_precision_score(Y_test[:, i], y_score[:, i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(
    Y_test.ravel(), y_score.ravel())
average_precision["micro"] = average_precision_score(Y_test,
                                                     y_score,
                                                     average="micro")

# %%
# Plot the micro-averaged Precision-Recall curve
# ..............................................
display = PrecisionRecallDisplay(
    recall=recall["micro"],
    precision=precision["micro"],
    average_precision=average_precision["micro"],