コード例 #1
0
def linreg_ccv_plot_roc(num_folds):

    global data
    folds = pd.create_folds(data, num_folds)
    classifier = LinearRegression()
    
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    for i in range(num_folds):
        test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, i)
        probs = classifier.fit(train_x, train_y).predict(test_x)
        fpr, tpr, thresholds = roc_curve(test_y, probs) #takes, y_true and y_score
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
    
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

    mean_tpr /= len(folds) 
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr, mean_tpr, 'k--',
             label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('%d-fold Clustered Cross-Validation' % num_folds)
    plt.legend(loc="lower right")
    plt.show()   
コード例 #2
0
def precision_recall_curve(num_folds): #w ccv 10fold
    #haven't tested that this works yet
    global data
    
    folds = pd.create_folds(data, num_folds)
    classifier = LogisticRegression()
    
    for j in range(num_folds):
        test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, j)
        probs = classifier.fit(train_x, train_y).predict_proba(test_x)
        
        precision, recall, _ = precision_recall_curve(test_y, probs[:, 1])
        print precision
        print recall
               
        precision = dict()
        recall = dict()
        average_precision = dict()
        #for i in range(n_classes):
        for i in range (2): #2 classes?
            precision[i], recall[i], _ = precision_recall_curve(test_y, probs[:, 1])
            average_precision[i] = average_precision_score(test_y, probs[:, 1])

        # Compute micro-average ROC curve and ROC area
        precision["micro"], recall["micro"], _ = precision_recall_curve(test_y.ravel(), probs[:, 1].ravel())
        average_precision["micro"] = average_precision_score(test_y, probs[:, 1],
                                                             average="micro")

        # Plot Precision-Recall curve
        plt.clf()
        plt.plot(recall[0], precision[0], label='Precision-Recall curve')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
        plt.legend(loc="lower left")
        plt.show()

        # Plot Precision-Recall curve for each class
        plt.clf()
        plt.plot(recall["micro"], precision["micro"],
                 label='micro-average Precision-recall curve (area = {0:0.2f})'
                       ''.format(average_precision["micro"]))
        # for i in range(n_classes):
        for i in range(2): #same deal
            plt.plot(recall[i], precision[i],
                     label='Precision-recall curve of class {0} (area = {1:0.2f})'
                           ''.format(i, average_precision[i]))

        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Extension of Precision-Recall curve to multi-class')
        plt.legend(loc="lower right")
        plt.show()
コード例 #3
0
def logreg_precision_recall_ccv(num_folds):

    global data
    folds = pd.create_folds(data, num_folds)
    classifier = LogisticRegression()
    
    mean_recall = 0.0
    mean_precision = 0.0
    for i in range(num_folds):
        test_x, test_y, train_x, train_y = pd.split_into_sets(data, folds, i)
        probs = classifier.fit(train_x, train_y).predict_proba(test_x)
        
        y_pred = [1 if x >= .5 else 0 for x in probs[:, 1]]
        
#         print test_y
#         print y_pred
        
        recall = recall_score(test_y, y_pred) #y_true, y_pred
#         print 'RECALL'
#         print recall
        
        precision = precision_score(test_y, y_pred)
#         print 'PRECISION'
#         print precision
#         
        mean_recall += recall
        mean_precision += precision


    mean_precision /= len(folds)
    mean_recall /= len(folds)
    
    print "MEAN PRECISION"
    print mean_precision
    print "MEAN RECALL"
    print mean_recall