Example #1
0
def compute_roc_labels_scikit(e_labels, true_labels):
    from scikits.learn.metrics import roc_curve
    from scikits.learn.metrics import auc as compute_auc
    from scipy import interp
    auc = []
    sensData = []
    specData = []
    grid_fpr = np.linspace(0,1,100)
    for j in xrange(true_labels.shape[0]):
        evalues = e_labels[j,:]
        tvalues = true_labels[j,:]
        # print 'evalues:', evalues.shape
        # print evalues
        # print 'tvalues:', tvalues.shape
        # print tvalues
        fpr, tpr, thresh = roc_curve(tvalues, evalues)
        # print 'fpr:'
        # print fpr
        # print 'tpr:'
        # print tpr
        # print 'thresh:'
        # print thresh
        auc.append(compute_auc(fpr, tpr))
        # print 'auc:', auc[-1]
        sensData.append(interp(grid_fpr, fpr, tpr))
        specData.append(grid_fpr)
        sensData[-1][0] = 0.
        # print 'sensData:'
        # print sensData[-1]
        # print 'specData:'
        # print specData[-1]
    sensData = np.array(sensData)
    specData = np.array(specData)
    auc = np.array(auc)
    return sensData, specData, auc
def catClassify(dataPath, catname, kernelType, dataext, catmap, nTopic):
    #read the categoy data which will positive
    fname = dataPath + catname + dataext
    catpos = np.genfromtxt(fname, dtype=np.int)  # catpos
    catpos = catpos[:, :nTopic + 1]
    catpos[:, nTopic] = 1
    #read the category data of remaining classes
    for cats in catmap.keys():
        if (cats != catname):
            firstvisit = True
            if (firstvisit):
                catneg = np.genfromtxt(fname, dtype=np.int)
                firstvisit = False
            else:
                catneg = np.concatenate(
                    (catneg, np.genfromtxt(fname, dtype=np.int)), axis=0)
    #sample the negative data to have equal size as the positive
    nPos = catpos.shape[0]
    nNeg = catneg.shape[0]
    catneg = catneg[np.random.randint(0, nNeg, nPos), :]  #catneg
    catneg = catneg[:, :nTopic + 1]
    catneg[:, nTopic] = 0
    #combine positive and negative data
    data = np.concatenate((catpos, catneg), axis=0)
    #shuffle the rows to aid in random selection of train and test
    np.random.shuffle(data)

    X = data[:, :nTopic]
    y = data[:, nTopic]

    #cross-validation
    cv = StratifiedKFold(y, k=nFold)
    #select classifier
    classifier = svm.SVC(kernel=kernelType, probability=True)
    metricstemp = np.zeros((nFold, nMetrics), np.float)
    for i, (train, test) in enumerate(cv):
        probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:,
                                                          1])  #@UnusedVariable
        roc_auc = auc(fpr, tpr)
        precision, recall, thresholds = precision_recall_curve(
            y[test], probas_[:, 1])  #@UnusedVariable
        pr_auc = auc(recall, precision)
        metricstemp[i] = [roc_auc, pr_auc]

    return [np.mean(metricstemp, axis=0), np.std(metricstemp, axis=0)]
def svm_roc(table, kernel='linear', C=1.0):
    '''Classification and ROC analysis
    '''
    from scikits.learn import svm
    from scikits.learn.metrics import roc_curve, auc
    import pylab as pl

    X = table[:, 1:]
    y = table[:, 0]
    n_samples, n_features = X.shape
    p = range(n_samples)
    np.random.seed(0)
    np.random.shuffle(p)
    X, y = X[p], y[p]
    half = int(n_samples / 2)

    # Run classifier
    classifier = svm.SVC(kernel=kernel, probability=True, C=C)
    probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[half:], probas_[:, 1])
    roc_auc = auc(fpr, tpr)
    print "Area under the ROC curve : %f" % roc_auc

    # Plot ROC curve
    pl.figure(-1)
    pl.clf()
    pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    pl.plot([0, 1], [0, 1], 'k--')
    pl.xlim([0.0, 1.0])
    pl.ylim([0.0, 1.0])
    pl.xlabel('False Positive Rate')
    pl.ylabel('True Positive Rate')
    pl.title('Receiver operating characteristic example')
    pl.legend(loc="lower right")
    pl.show()
def main():
    try:
        kernelType = sys.argv[1]
    except(IndexError):
        kernelType='linear'
    
    #catmap = getCatMap(dataset)
    #initialise output matrices
    rocauc = np.zeros((nDim,nCategory),dtype=np.float32)
    mapauc = np.zeros((nDim,nCategory),dtype=np.float32)
    
    nSamplesPerCat = int(np.round(nClusterSamples/nCategory))
    for iLDim,ldim in enumerate(ldims):
        #write the lower dimensional projections for each category
        for iCategory,catname in enumerate(catList):
            dataOuttemp = dimred(iCategory,catname,ldim)
            dataOut = np.array(np.round(dataOuttemp).astype(np.int16),dtype=np.int16)
            outFilename = tempPath+catname+'.'+dataExt
            np.savetxt(outFilename, dataOut, delimiter=' ', fmt='%d')
            if(dataOut.shape[0] <= nSamplesPerCat):
                catSample = dataOut
            else:
                rndsample = np.random.randint(0,dataOut.shape[0],nSamplesPerCat)
                catSample = dataOut[rndsample,:]
            if(iCategory==0):
                dataLower = catSample
            else:
                dataLower = np.concatenate((dataLower,catSample),axis=0)
        #cluster random sampled lower dimensional data
        # compute the code-book for the data-set
        [CodeBook,label] = kmeans2(dataLower,nCodewords,iter=nIterKmeans,minit='points',missing='warn') #@UnusedVariable
        # write code-book to file
        cbfilepath = tempPath+dataset+codebookext
        cbfile = open(cbfilepath,'w')
        np.savetxt(cbfile,CodeBook,fmt='%f', delimiter=' ',)
        cbfile.close()
        
        for iCategory,catname in enumerate(catList):
            tempFilename = tempPath+catname+'.'+dataExt
            catData = np.loadtxt(tempFilename, dtype=np.int16, delimiter=' ')
            [catLabel,catDist] = vq(catData,CodeBook) #@UnusedVariable
            catfilePath = dataPath+catname+'.'+dataExt
            catImgId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-2])
            catId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-1])[0]
            ImgId = np.unique(catImgId)
            catboffilepath = tempPath+catname+bofext
            imgcount=0
            for imgid in ImgId:
                imgLabel = catLabel[catImgId==imgid]
                [hist,edges] = np.histogram(imgLabel,nCodewords) #@UnusedVariable
                if imgcount==0:
                    dataout = np.hstack((hist.T,imgid,catId))
                else:
                    dataout = np.vstack((dataout,np.hstack((hist.T,imgid,catId))))
                imgcount+=1
            np.savetxt(catboffilepath, dataout, fmt='%d', delimiter=' ', )
        
        select = np.concatenate((np.arange(nCodewords),[nCodewords+1]),axis=1)
        for iCategory,catname in enumerate(catList):
            #posLabel = catmap.get(catname)
            #negLabel = 0
            #read the category data which will positive
            catboffilepath = tempPath+catname+bofext
            catpos = np.genfromtxt(catboffilepath,dtype=np.int)   
            catpos = catpos.take(select,axis=1)
            catpos[:,-1] = 1
            #posLabel = catpos[0][-1]
            catset = set(catList)
            catset.remove(catname)
            firstvisit = True
            for cat in catset: #@UnusedVariable
                catboffilepath = tempPath+catname+bofext
                if(firstvisit):
                    catneg = np.genfromtxt(catboffilepath,dtype=np.int)
                    firstvisit = False
                else : 
                    catneg = np.concatenate((catneg,np.genfromtxt(catboffilepath,dtype=np.int)),axis=0)
                
            #sample the negative data to have equal size as the positive
            nPos = catpos.shape[0]
            nNeg = catneg.shape[0]
            catneg = catneg[np.random.randint(0,nNeg,nPos),:]
            catneg = catneg.take(select,axis=1)
            catneg[:,-1] = -1
            #combine positive and negative data
            data = np.concatenate((catpos,catneg),axis=0)
            
            #shuffle the rows to aid in random selection of train and test
            #np.random.shuffle(data)
            
            X = data[:,:nCodewords]
            y = data[:,nCodewords]
            #labels for cross validation
            
            #y2 = np.where(y!=posLabel,0,y)
            #y2 = np.where(y2==posLabel,1,y2)
            
            #cross-validation
            cv = StratifiedKFold(y, k=nFold)
            #select classifier
            classifier = svm.SVC(kernel=kernelType, probability=True)
            metricstemp = np.zeros((nFold,nMetrics),np.float)
            
            for i, (train, test) in enumerate(cv):
                probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
                print y[test]
                print probas_[:,1]
                try:
                    fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1]) #@UnusedVariable
                    roc_auc = auc(fpr, tpr)
                except:
                    roc_auc = 0.
                try:
                    precision, recall, thresholds = precision_recall_curve(y[test], probas_[:,1]) #@UnusedVariable
                    pr_auc = auc(recall,precision)
                except:
                    pr_auc = 0.
                metricstemp[i] = [roc_auc,pr_auc]
                
            rocauc[iLDim,iCategory] = np.mean(metricstemp[0],axis=0)
            mapauc[iLDim,iCategory] = np.mean(metricstemp[1],axis=0)
            print '%s classified...' % (catname)
     
    outPath = rootDir + dataset + outDir + '%s%s%s%s'%('dimensionality',dataset,kernelType,'.svg')
    outPath1 = rootDir + dataset + outDir + '%s%s%s%s' % ('dimensionality',dataset,kernelType,'.npz') 
    plt.figure(0)
    #ax = plt.subplot(111)
    plt.errorbar(np.arange(1,nDim+1), np.mean(rocauc,axis=1), np.std(rocauc,axis=1), fmt = '-', elinewidth=1, marker = 'x', label = 'AUC-ROC')
    plt.errorbar(np.arange(1,nDim+1), np.mean(mapauc,axis=1), np.std(mapauc,axis=1), fmt = '--', elinewidth=1, marker = 'o', label = 'MAP')
    plt.xlabel('Visual Categories')
    plt.ylabel('Performance Metric')
    plt.title('BOF Performance: %s : %s' % (dataset,kernelType))
    plt.legend(loc="lower right")
    #ax.set_xticks()
    #ax.set_xticklabels(ldim,size='small',ha='center')
    plt.savefig(outPath,format='svg')
    try:
        np.savez(outPath1,rocauc,mapauc)
    except:
        print 'unable to write file %s' % (outPath1)
    

    plt.show()
    plt.close()
Example #5
0
n_samples, n_features = X.shape
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples/2)

# Add noisy features
X = np.c_[X,np.random.randn(n_samples, 200*n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:])

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[half:], probas_[:,1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0,1.0])
pl.ylim([0.0,1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
        pl.xticks([])
        pl.yticks([])

    print "predict species distribution"
    Z = predict(clf, mean, std)
    levels = np.linspace(Z.min(), Z.max(), 25)
    Z[coverage[2,:,:] == -9999] = -9999
    CS = pl.contourf(X, Y, Z, levels=levels, cmap=pl.cm.Reds)
    pl.colorbar(format='%.2f')
    pl.scatter(species.train[:, 0], species.train[:, 1], s=2**2, c='black',
               marker='^', label='train')
    pl.scatter(species.test[:, 0], species.test[:, 1], s=2**2, c='black',
               marker='x', label='test')
    pl.legend()
    pl.title(species.name)
    pl.axis('equal')

    # Compute AUC w.r.t. background points
    pred_background = Z[background_points[0], background_points[1]]
    pred_test = clf.decision_function((species.test_cover-mean)/std)[:,0]
    scores = np.r_[pred_test, pred_background]
    y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
    fpr, tpr, thresholds = roc_curve(y, scores)
    roc_auc = auc(fpr, tpr)
    pl.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
    print "Area under the ROC curve : %f" % roc_auc

print "time elapsed: %.3fs" % (time() - t0)

pl.show()
Example #7
0
n_samples, n_features = X.shape
p = range(n_samples)
random.seed(0)
random.shuffle(p)
X, y = X[p], y[p]
half = int(n_samples / 2)

# Add noisy features
X = np.c_[X, np.random.randn(n_samples, 200 * n_features)]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:])

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[half:], probas_[:, 1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
Example #8
0
        pl.xticks([])
        pl.yticks([])

    print "predict species distribution"
    Z = predict(clf, mean, std)
    levels = np.linspace(Z.min(), Z.max(), 25)
    Z[coverage[2,:,:] == -9999] = -9999
    CS = pl.contourf(X, Y, Z, levels=levels, cmap=pl.cm.Reds)
    pl.colorbar(format='%.2f')
    pl.scatter(species.train[:, 0], species.train[:, 1], s=2**2, c='black',
               marker='^', label='train')
    pl.scatter(species.test[:, 0], species.test[:, 1], s=2**2, c='black',
               marker='x', label='test')
    pl.legend()
    pl.title(species.name)
    pl.axis('equal')

    # Compute AUC w.r.t. background points
    pred_background = Z[background_points[0], background_points[1]]
    pred_test = clf.decision_function((species.test_cover-mean)/std)[:,0]
    scores = np.r_[pred_test, pred_background]
    y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
    fpr, tpr, thresholds = roc_curve(y, scores)
    roc_auc = auc(fpr, tpr)
    pl.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
    print "Area under the ROC curve : %f" % roc_auc

print "time elapsed: %.3fs" % (time() - t0)

pl.show()
################################################################################
# Classification and ROC analysis

# Run classifier with crossvalidation and plot ROC curves
cv = StratifiedKFold(y, k=6)
classifier = svm.SVC(kernel='linear', probability=True)

mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1])
    mean_tpr += interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

pl.plot([0, 1], [0, 1], '--', color=(0.6,0.6,0.6), label='Luck')

mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
pl.plot(mean_fpr, mean_tpr, 'k--',
        label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)

pl.xlim([-0.05,1.05])
pl.ylim([-0.05,1.05])
Example #10
0
# Add noisy features to make the problem harder
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

# shuffle and split training and test sets
X, y = shuffle(X, y, random_state=random_state)
half = int(n_samples / 2)
X_train, X_test = X[:half], X[half:]
y_train, y_test = y[:half], y[half:]

# Run classifier
classifier = svm.SVC(kernel='linear', probability=True)
probas_ = classifier.fit(X_train, y_train).predict_proba(X_test)

# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1])
roc_auc = auc(fpr, tpr)
print "Area under the ROC curve : %f" % roc_auc

# Plot ROC curve
pl.figure(-1)
pl.clf()
pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
pl.plot([0, 1], [0, 1], 'k--')
pl.xlim([0.0, 1.0])
pl.ylim([0.0, 1.0])
pl.xlabel('False Positive Rate')
pl.ylabel('True Positive Rate')
pl.title('Receiver operating characteristic example')
pl.legend(loc="lower right")
pl.show()
################################################################################
# Classification and ROC analysis

# Run classifier with crossvalidation and plot ROC curves
cv = StratifiedKFold(y, k=6)
classifier = svm.SVC(kernel='linear', probability=True)

mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
all_tpr = []

for i, (train, test) in enumerate(cv):
    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
    mean_tpr += interp(mean_fpr, fpr, tpr)
    mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    pl.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))

pl.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')

mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
pl.plot(mean_fpr,
        mean_tpr,
        'k--',
        label='Mean ROC (area = %0.2f)' % mean_auc,
        lw=2)