Exemple #1
0
def AUC(targetVariable, allPredictions):
    trainMask = numpy.isfinite(targetVariable)
    targetVariableTrainOnly = targetVariable[trainMask]
    predictionsTrainOnly = allPredictions[trainMask]
    FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly)
    roc_auc = auc(FPR, TPR)
    return roc_auc
def catClassify(botData, kernelType, nTopic):
    X = botData[:, :nTopic]
    y = botData[:, nTopic]

    # Run classifier
    #    classifier = svm.SVC(kernel='linear', probability=True)

    classifier = svm.NuSVC(probability=True)

    #cross-validation
    cv = StratifiedKFold(y, k=nFold)
    #select classifier
    #classifier = svm.SVC(kernel=kernelType, probability=True)

    metricstemp = np.zeros((nFold, nMetrics), np.float)
    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        #fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1]) #@UnusedVariable
        #roc_auc = auc(fpr, tpr)
        precision, recall, thresholds = precision_recall_curve(
            y[test], probas_[:, 1])  #@UnusedVariable
        pr_auc = auc(recall, precision)
        metricstemp[i] = [pr_auc]

    return [np.mean(metricstemp), np.std(metricstemp)]
def catClassify(dataPath, catname, kernelType, dataext, catmap, nTopic):
    #read the categoy data which will positive
    fname = dataPath + catname + dataext
    catpos = np.genfromtxt(fname, dtype=np.int)  # catpos
    catpos = catpos[:, :nTopic + 1]
    catpos[:, nTopic] = 1
    #read the category data of remaining classes
    for cats in catmap.keys():
        if (cats != catname):
            firstvisit = True
            if (firstvisit):
                catneg = np.genfromtxt(fname, dtype=np.int)
                firstvisit = False
            else:
                catneg = np.concatenate(
                    (catneg, np.genfromtxt(fname, dtype=np.int)), axis=0)
    #sample the negative data to have equal size as the positive
    nPos = catpos.shape[0]
    nNeg = catneg.shape[0]
    catneg = catneg[np.random.randint(0, nNeg, nPos), :]  #catneg
    catneg = catneg[:, :nTopic + 1]
    catneg[:, nTopic] = 0
    #combine positive and negative data
    data = np.concatenate((catpos, catneg), axis=0)
    #shuffle the rows to aid in random selection of train and test
    np.random.shuffle(data)

    X = data[:, :nTopic]
    y = data[:, nTopic]

    #cross-validation
    cv = StratifiedKFold(y, k=nFold)
    #select classifier
    classifier = svm.SVC(kernel=kernelType, probability=True)
    metricstemp = np.zeros((nFold, nMetrics), np.float)
    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:,
                                                          1])  #@UnusedVariable
        roc_auc = auc(fpr, tpr)
        precision, recall, thresholds = precision_recall_curve(
            y[test], probas_[:, 1])  #@UnusedVariable
        pr_auc = auc(recall, precision)
        metricstemp[i] = [roc_auc, pr_auc]

    return [np.mean(metricstemp, axis=0), np.std(metricstemp, axis=0)]
Exemple #4
0
def _calculate_auc(classifier, Xt, yt):
    w = classifier.coef_
    b = classifier.intercept_[0]
    lin = np.dot(Xt, w.T) + b
    prob = sigmoid(lin)
    fpr, tpr, thresholds = roc_curve(yt, prob, 
            thresholds=np.linspace(prob.min(),prob.max(),1e3))
    auc_score = auc(fpr, tpr)
    return auc_score, fpr, tpr
def AUC(targetVariable, allPredictions):
    AUC_DEC_PTS = 3  # decimal points to round predictions to, to speed AUC calculation
    trainMask = numpy.isfinite(targetVariable)
    targetVariableTrainOnly = targetVariable[trainMask]
    predictionsTrainOnly = allPredictions[trainMask]
    predictionsTrainOnly = numpy.round(predictionsTrainOnly,
                                       decimals=AUC_DEC_PTS)  #new
    FPR, TPR, thresholds = roc(targetVariableTrainOnly, predictionsTrainOnly)
    roc_auc = auc(FPR, TPR)
    return roc_auc
Exemple #6
0
def AUCkFoldLogisticRegression(regularization, inData, penalty, kFolds):
    print "\n\tCalculating AUC for regularization", regularization, "using", kFolds, "folds"
    sys.stdout.flush()
    xData, yData = getXYData(inData)
    nSamples, nFeatures = xData.shape
    if nSamples % kFolds != 0:
        raise UserWarning(
            "Uneven fold sizes! Must evenly divide 5922 (e.g. 2,3,7 or 9 folds"
        )
        # 2, 3, 7, and 9 are factors of 5922 (#data points) & yield equal fold sizes
    crossValFolds = KFold(nSamples, kFolds)
    yTestDataAllFolds = array([])
    probasTestDataAllFolds = array([])
    sumAUC = 0.0
    for foldNum, (train, test) in enumerate(crossValFolds):
        # fit a new LR model for each fold's data & evaluate using AUC
        LRclassifier = LogisticRegression(C=regularization, penalty=penalty)
        probas_ = LRclassifier.fit(xData[train],
                                   yData[train]).predict_proba(xData[test])
        numNon0Coefs = sum(
            [1 for coef in LRclassifier.coef_[:][0] if coef != 0])
        # probas_ contains 2 columns of probabilities, one for each of the 2 classes (0,1)
        # In the documentation, seems like col 1 is for class 1,
        # but tests show it seems like col 0 is for class 1, so we use that below.
        CLASS_1_COL = 0
        # Compute ROC curve and area under the curve
        FPR, TPR, thresholds = roc(yData[test], probas_[:, CLASS_1_COL])
        roc_auc = auc(FPR, TPR)
        print "\tFold:", foldNum, " AUC:", roc_auc, "Non0Coefs:", numNon0Coefs,
        print "Reg:", regularization,
        print localTimeString()
        sys.stdout.flush()
        sumAUC += roc_auc
        yTestDataAllFolds = numpy.concatenate((yTestDataAllFolds, yData[test]))
        probasTestDataAllFolds = \
                numpy.concatenate((probasTestDataAllFolds,probas_[:,CLASS_1_COL]) )
    FPRallFolds, TPRallFolds, thresholds = roc(yTestDataAllFolds,
                                               probasTestDataAllFolds)
    roc_auc_allFolds = auc(FPRallFolds, TPRallFolds)
    print "AUC_all_folds:", roc_auc_allFolds,
    print "Reg:", regularization, "Penalty:", penalty, "kFolds:", kFolds,
    print localTimeString()
    return roc_auc_allFolds
Exemple #7
0
def plot_precision_recall(precision, recall):
    """
    Plot the ROC curve.
       precision, recall, thresholds = precision_recall_curve(y[half:], probas_[:,1])
       plot_precision_recall(precision, recall)
    Code from http://scikit-learn.sourceforge.net/auto_examples/plot_precision_recall.html
    """
    pl.figure(-1)
    pl.clf()
    area = auc(recall, precision)
    pl.plot(recall, precision, label='Precision-Recall curve (area = %0.2f)' % area)
    pl.xlabel('Recall')
    pl.ylabel('Precision')
    pl.ylim([0.0,1.05])
    pl.xlim([0.0,1.0])
    pl.title('Precision-Recall example: AUC=%0.2f' % area)
    pl.legend(loc="lower left")
    pl.show()
Exemple #8
0
def plot_roc(fpr, tpr):
    """
    Plot the ROC curve.
       fpr, tpr, thresholds = roc_curve(y[half:], probas_[:,1])
        plot_roc(fpr, tpr)
    Code from http://scikit-learn.sourceforge.net/auto_examples/plot_roc.html
    """
    # Plot ROC curve
    pl.figure(-1)
    pl.clf()
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    pl.plot([0, 1], [0, 1], 'k--')
    pl.xlim([0.0,1.0])
    pl.ylim([0.0,1.0])
    pl.xlabel('False Positive Rate')
    pl.ylabel('True Positive Rate')
    pl.title('Receiver operating characteristic example')
    pl.legend(loc="lower right")
    pl.show()
def svm_roc(table, kernel='linear', C=1.0):
    '''Classification and ROC analysis
    '''
    from scikits.learn import svm
    from scikits.learn.metrics import roc_curve, auc
    import pylab as pl

    X = table[:, 1:]
    y = table[:, 0]
    n_samples, n_features = X.shape
    p = range(n_samples)
    np.random.seed(0)
    np.random.shuffle(p)
    X, y = X[p], y[p]
    half = int(n_samples / 2)

    # Run classifier
    classifier = svm.SVC(kernel=kernel, probability=True, C=C)
    probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[half:], probas_[:, 1])
    roc_auc = auc(fpr, tpr)
    print "Area under the ROC curve : %f" % roc_auc

    # Plot ROC curve
    pl.figure(-1)
    pl.clf()
    pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    pl.plot([0, 1], [0, 1], 'k--')
    pl.xlim([0.0, 1.0])
    pl.ylim([0.0, 1.0])
    pl.xlabel('False Positive Rate')
    pl.ylabel('True Positive Rate')
    pl.title('Receiver operating characteristic example')
    pl.legend(loc="lower right")
    pl.show()
Exemple #10
0
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples / 2)

# Add noisy features
X = np.c_[X, np.random.randn(n_samples, 200 * n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[half:], probas_[:, 1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
Exemple #11
0
def calculate_auc_score(nn, X, y):
    probabilities = np.array([nn.activate(row).tolist() for row in X])
    fpr, tpr, thresholds = roc_curve(y, probabilities, 
            thresholds=np.linspace(0,1,1e3))
    auc_score = auc(fpr, tpr)
    return auc_score, fpr, tpr
n_samples, n_features = X.shape
p = range(n_samples)  # Shuffle samples
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples / 2)

# Add noisy features
np.random.seed(0)
X = np.c_[X, np.random.randn(n_samples, 200 * n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

# Compute Precision-Recall and plot curve
precision, recall, thresholds = precision_recall_curve(y[half:], probas_[:, 1])
area = auc(recall, precision)
print "Area Under Curve: %0.2f" % area

pl.figure(-1)
pl.clf()
pl.plot(recall, precision, label='Precision-Recall curve')
pl.xlabel('Recall')
pl.ylabel('Precision')
pl.ylim([0.0, 1.05])
pl.xlim([0.0, 1.0])
pl.title('Precision-Recall example: AUC=%0.2f' % area)
pl.legend(loc="lower left")
pl.show()
n_samples, n_features = X.shape
p = range(n_samples) # Shuffle samples
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples/2)

# Add noisy features
np.random.seed(0)
X = np.c_[X,np.random.randn(n_samples, 200 * n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

# Compute Precision-Recall and plot curve
precision, recall, thresholds = precision_recall_curve(y[half:], probas_[:,1])
area = auc(recall, precision)
print "Area Under Curve: %0.2f" % area

pl.figure(-1)
pl.clf()
pl.plot(recall, precision, label='Precision-Recall curve')
pl.xlabel('Recall')
pl.ylabel('Precision')
pl.ylim([0.0,1.05])
pl.xlim([0.0,1.0])
pl.title('Precision-Recall example: AUC=%0.2f' % area)
pl.legend(loc="lower left")
pl.show()
# Run classifier with crossvalidation and plot ROC curves
cv = StratifiedKFold(y, k=6)
classifier = svm.SVC(kernel='linear', probability=True)

mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1])
    mean_tpr += interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

pl.plot([0, 1], [0, 1], '--', color=(0.6,0.6,0.6), label='Luck')

mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
pl.plot(mean_fpr, mean_tpr, 'k--',
        label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

pl.xlim([-0.05,1.05])
pl.ylim([-0.05,1.05])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
def main():
    try:
        kernelType = sys.argv[1]
    except(IndexError):
        kernelType='linear'
    
    #catmap = getCatMap(dataset)
    #initialise output matrices
    rocauc = np.zeros((nDim,nCategory),dtype=np.float32)
    mapauc = np.zeros((nDim,nCategory),dtype=np.float32)
    
    nSamplesPerCat = int(np.round(nClusterSamples/nCategory))
    for iLDim,ldim in enumerate(ldims):
        #write the lower dimensional projections for each category
        for iCategory,catname in enumerate(catList):
            dataOuttemp = dimred(iCategory,catname,ldim)
            dataOut = np.array(np.round(dataOuttemp).astype(np.int16),dtype=np.int16)
            outFilename = tempPath+catname+'.'+dataExt
            np.savetxt(outFilename, dataOut, delimiter=' ', fmt='%d')
            if(dataOut.shape[0] <= nSamplesPerCat):
                catSample = dataOut
            else:
                rndsample = np.random.randint(0,dataOut.shape[0],nSamplesPerCat)
                catSample = dataOut[rndsample,:]
            if(iCategory==0):
                dataLower = catSample
            else:
                dataLower = np.concatenate((dataLower,catSample),axis=0)
        #cluster random sampled lower dimensional data
        # compute the code-book for the data-set
        [CodeBook,label] = kmeans2(dataLower,nCodewords,iter=nIterKmeans,minit='points',missing='warn') #@UnusedVariable
        # write code-book to file
        cbfilepath = tempPath+dataset+codebookext
        cbfile = open(cbfilepath,'w')
        np.savetxt(cbfile,CodeBook,fmt='%f', delimiter=' ',)
        cbfile.close()
        
        for iCategory,catname in enumerate(catList):
            tempFilename = tempPath+catname+'.'+dataExt
            catData = np.loadtxt(tempFilename, dtype=np.int16, delimiter=' ')
            [catLabel,catDist] = vq(catData,CodeBook) #@UnusedVariable
            catfilePath = dataPath+catname+'.'+dataExt
            catImgId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-2])
            catId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-1])[0]
            ImgId = np.unique(catImgId)
            catboffilepath = tempPath+catname+bofext
            imgcount=0
            for imgid in ImgId:
                imgLabel = catLabel[catImgId==imgid]
                [hist,edges] = np.histogram(imgLabel,nCodewords) #@UnusedVariable
                if imgcount==0:
                    dataout = np.hstack((hist.T,imgid,catId))
                else:
                    dataout = np.vstack((dataout,np.hstack((hist.T,imgid,catId))))
                imgcount+=1
            np.savetxt(catboffilepath, dataout, fmt='%d', delimiter=' ', )
        
        select = np.concatenate((np.arange(nCodewords),[nCodewords+1]),axis=1)
        for iCategory,catname in enumerate(catList):
            #posLabel = catmap.get(catname)
            #negLabel = 0
            #read the category data which will positive
            catboffilepath = tempPath+catname+bofext
            catpos = np.genfromtxt(catboffilepath,dtype=np.int)   
            catpos = catpos.take(select,axis=1)
            catpos[:,-1] = 1
            #posLabel = catpos[0][-1]
            catset = set(catList)
            catset.remove(catname)
            firstvisit = True
            for cat in catset: #@UnusedVariable
                catboffilepath = tempPath+catname+bofext
                if(firstvisit):
                    catneg = np.genfromtxt(catboffilepath,dtype=np.int)
                    firstvisit = False
                else : 
                    catneg = np.concatenate((catneg,np.genfromtxt(catboffilepath,dtype=np.int)),axis=0)
                
            #sample the negative data to have equal size as the positive
            nPos = catpos.shape[0]
            nNeg = catneg.shape[0]
            catneg = catneg[np.random.randint(0,nNeg,nPos),:]
            catneg = catneg.take(select,axis=1)
            catneg[:,-1] = -1
            #combine positive and negative data
            data = np.concatenate((catpos,catneg),axis=0)
            
            #shuffle the rows to aid in random selection of train and test
            #np.random.shuffle(data)
            
            X = data[:,:nCodewords]
            y = data[:,nCodewords]
            #labels for cross validation
            
            #y2 = np.where(y!=posLabel,0,y)
            #y2 = np.where(y2==posLabel,1,y2)
            
            #cross-validation
            cv = StratifiedKFold(y, k=nFold)
            #select classifier
            classifier = svm.SVC(kernel=kernelType, probability=True)
            metricstemp = np.zeros((nFold,nMetrics),np.float)
            
            for i, (train, test) in enumerate(cv):
                probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
                print y[test]
                print probas_[:,1]
                try:
                    fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1]) #@UnusedVariable
                    roc_auc = auc(fpr, tpr)
                except:
                    roc_auc = 0.
                try:
                    precision, recall, thresholds = precision_recall_curve(y[test], probas_[:,1]) #@UnusedVariable
                    pr_auc = auc(recall,precision)
                except:
                    pr_auc = 0.
                metricstemp[i] = [roc_auc,pr_auc]
                
            rocauc[iLDim,iCategory] = np.mean(metricstemp[0],axis=0)
            mapauc[iLDim,iCategory] = np.mean(metricstemp[1],axis=0)
            print '%s classified...' % (catname)
     
    outPath = rootDir + dataset + outDir + '%s%s%s%s'%('dimensionality',dataset,kernelType,'.svg')
    outPath1 = rootDir + dataset + outDir + '%s%s%s%s' % ('dimensionality',dataset,kernelType,'.npz') 
    plt.figure(0)
    #ax = plt.subplot(111)
    plt.errorbar(np.arange(1,nDim+1), np.mean(rocauc,axis=1), np.std(rocauc,axis=1), fmt = '-', elinewidth=1, marker = 'x', label = 'AUC-ROC')
    plt.errorbar(np.arange(1,nDim+1), np.mean(mapauc,axis=1), np.std(mapauc,axis=1), fmt = '--', elinewidth=1, marker = 'o', label = 'MAP')
    plt.xlabel('Visual Categories')
    plt.ylabel('Performance Metric')
    plt.title('BOF Performance: %s : %s' % (dataset,kernelType))
    plt.legend(loc="lower right")
    #ax.set_xticks()
    #ax.set_xticklabels(ldim,size='small',ha='center')
    plt.savefig(outPath,format='svg')
    try:
        np.savez(outPath1,rocauc,mapauc)
    except:
        print 'unable to write file %s' % (outPath1)
    

    plt.show()
    plt.close()
from src.utils import L_ex, sigmoid, roc_curve, get_path


path = get_path(__file__) + '/..'

w = np.array([-410.6073, 0.1494, 4.4185])

idxs = [L_ex.index(f) for f in ['sdE5', 'V11', 'E9']]
Xf = D_ex[:, idxs]

num_tests = 5
results = []

for i in range(num_tests):
    test_rows = np.random.random_integers(0, D_ex.shape[0]-1, 1e5)

    X = Xf[test_rows, :]
    y = D_ex[test_rows, 2]

    lin = np.dot(X, w)
    probs = sigmoid(lin)

    fpr, tpr, thresholds = roc_curve(y, probs, thresholds=np.linspace(0,1,1e3))

    results.append(auc(fpr, tpr))


json_path = '{0}/data/hard-coded-results-{1}-tests.json'.format(path, num_tests)
with open(json_path, 'w') as f:
    json.dump(results, f, indent=4)
Exemple #17
0
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples/2)

# Add noisy features
X = np.c_[X,np.random.randn(n_samples, 200*n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:])

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[half:], probas_[:,1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0,1.0])
pl.ylim([0.0,1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
def calcAUC(targetData):
    yData = targetData
    probas = numpy.random.random((len(targetData)))
    FPR, TPR, thresholds = roc(yData, probas)
    roc_auc = auc(FPR, TPR)
    return roc_auc
Exemple #19
0
def test_roc():
    """test Receiver operating characteristic (ROC)"""
    fpr, tpr, thresholds = roc(y[half:], probas_[:,1])
    roc_auc = auc(fpr, tpr)
    assert_array_almost_equal(roc_auc, 0.80, decimal=2)
Exemple #20
0
def test_precision_recall():
    """test Precision-Recall"""
    precision, recall, thresholds = precision_recall(y[half:], probas_[:,1])
    precision_recall_auc = auc(precision, recall)
    assert_array_almost_equal(precision_recall_auc, 0.3197, 3)
# Run classifier with crossvalidation and plot ROC curves
cv = StratifiedKFold(y, k=6)
classifier = svm.SVC(kernel='linear', probability=True)

mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
    mean_tpr += interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

pl.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
pl.plot(mean_fpr,
        mean_tpr,
        'k--',
        label='Mean ROC (area = %0.2f)' % mean_auc,
        lw=2)

pl.xlim([-0.05, 1.05])
pl.ylim([-0.05, 1.05])