def catClassify(botData, kernelType, nTopic): X = botData[:, :nTopic] y = botData[:, nTopic] # Run classifier # classifier = svm.SVC(kernel='linear', probability=True) classifier = svm.NuSVC(probability=True) #cross-validation cv = StratifiedKFold(y, k=nFold) #select classifier #classifier = svm.SVC(kernel=kernelType, probability=True) metricstemp = np.zeros((nFold, nMetrics), np.float) for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) #fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1]) #@UnusedVariable #roc_auc = auc(fpr, tpr) precision, recall, thresholds = precision_recall_curve( y[test], probas_[:, 1]) #@UnusedVariable pr_auc = auc(recall, precision) metricstemp[i] = [pr_auc] return [np.mean(metricstemp), np.std(metricstemp)]
def catClassify(dataPath, catname, kernelType, dataext, catmap, nTopic): #read the categoy data which will positive fname = dataPath + catname + dataext catpos = np.genfromtxt(fname, dtype=np.int) # catpos catpos = catpos[:, :nTopic + 1] catpos[:, nTopic] = 1 #read the category data of remaining classes for cats in catmap.keys(): if (cats != catname): firstvisit = True if (firstvisit): catneg = np.genfromtxt(fname, dtype=np.int) firstvisit = False else: catneg = np.concatenate( (catneg, np.genfromtxt(fname, dtype=np.int)), axis=0) #sample the negative data to have equal size as the positive nPos = catpos.shape[0] nNeg = catneg.shape[0] catneg = catneg[np.random.randint(0, nNeg, nPos), :] #catneg catneg = catneg[:, :nTopic + 1] catneg[:, nTopic] = 0 #combine positive and negative data data = np.concatenate((catpos, catneg), axis=0) #shuffle the rows to aid in random selection of train and test np.random.shuffle(data) X = data[:, :nTopic] y = data[:, nTopic] #cross-validation cv = StratifiedKFold(y, k=nFold) #select classifier classifier = svm.SVC(kernel=kernelType, probability=True) metricstemp = np.zeros((nFold, nMetrics), np.float) for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) #@UnusedVariable roc_auc = auc(fpr, tpr) precision, recall, thresholds = precision_recall_curve( y[test], probas_[:, 1]) #@UnusedVariable pr_auc = auc(recall, precision) metricstemp[i] = [roc_auc, pr_auc] return [np.mean(metricstemp, axis=0), np.std(metricstemp, axis=0)]
n_samples, n_features = X.shape p = range(n_samples) # Shuffle samples random.seed(0) random.shuffle(p) X, y = X[p], y[p] half = int(n_samples / 2) # Add noisy features np.random.seed(0) X = np.c_[X, np.random.randn(n_samples, 200 * n_features)] # Run classifier classifier = svm.SVC(kernel='linear', probability=True) probas_ = classifier.fit(X[:half], y[:half]).predict_proba(X[half:]) # Compute Precision-Recall and plot curve precision, recall, thresholds = precision_recall_curve(y[half:], probas_[:, 1]) area = auc(recall, precision) print "Area Under Curve: %0.2f" % area pl.figure(-1) pl.clf() pl.plot(recall, precision, label='Precision-Recall curve') pl.xlabel('Recall') pl.ylabel('Precision') pl.ylim([0.0, 1.05]) pl.xlim([0.0, 1.0]) pl.title('Precision-Recall example: AUC=%0.2f' % area) pl.legend(loc="lower left") pl.show()
y = iris.target X, y = X[y!=2], y[y!=2] # Keep also 2 classes (0 and 1) n_samples, n_features = X.shape p = range(n_samples) # Shuffle samples random.seed(0) random.shuffle(p) X, y = X[p], y[p] half = int(n_samples/2) # Add noisy features np.random.seed(0) X = np.c_[X,np.random.randn(n_samples, 200*n_features)] # Run classifier classifier = svm.SVC(kernel='linear', probability=True) probas_ = classifier.fit(X[:half],y[:half]).predict_proba(X[half:]) # Compute Precision-Recall and plot curve precision, recall, thresholds = precision_recall_curve(y[half:], probas_[:,1]) pl.figure(-1) pl.clf() pl.plot(recall, precision, label='Precision-Recall curve') pl.xlabel('Recall') pl.ylabel('Precision') pl.ylim([0.0,1.05]) pl.xlim([0.0,1.0]) pl.title('Precision-Recall example') pl.legend(loc="lower left") pl.show()
def main(): try: kernelType = sys.argv[1] except(IndexError): kernelType='linear' #catmap = getCatMap(dataset) #initialise output matrices rocauc = np.zeros((nDim,nCategory),dtype=np.float32) mapauc = np.zeros((nDim,nCategory),dtype=np.float32) nSamplesPerCat = int(np.round(nClusterSamples/nCategory)) for iLDim,ldim in enumerate(ldims): #write the lower dimensional projections for each category for iCategory,catname in enumerate(catList): dataOuttemp = dimred(iCategory,catname,ldim) dataOut = np.array(np.round(dataOuttemp).astype(np.int16),dtype=np.int16) outFilename = tempPath+catname+'.'+dataExt np.savetxt(outFilename, dataOut, delimiter=' ', fmt='%d') if(dataOut.shape[0] <= nSamplesPerCat): catSample = dataOut else: rndsample = np.random.randint(0,dataOut.shape[0],nSamplesPerCat) catSample = dataOut[rndsample,:] if(iCategory==0): dataLower = catSample else: dataLower = np.concatenate((dataLower,catSample),axis=0) #cluster random sampled lower dimensional data # compute the code-book for the data-set [CodeBook,label] = kmeans2(dataLower,nCodewords,iter=nIterKmeans,minit='points',missing='warn') #@UnusedVariable # write code-book to file cbfilepath = tempPath+dataset+codebookext cbfile = open(cbfilepath,'w') np.savetxt(cbfile,CodeBook,fmt='%f', delimiter=' ',) cbfile.close() for iCategory,catname in enumerate(catList): tempFilename = tempPath+catname+'.'+dataExt catData = np.loadtxt(tempFilename, dtype=np.int16, delimiter=' ') [catLabel,catDist] = vq(catData,CodeBook) #@UnusedVariable catfilePath = dataPath+catname+'.'+dataExt catImgId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-2]) catId = np.genfromtxt(catfilePath,dtype=np.int,usecols=[-1])[0] ImgId = np.unique(catImgId) catboffilepath = tempPath+catname+bofext imgcount=0 for imgid in ImgId: imgLabel = catLabel[catImgId==imgid] [hist,edges] = np.histogram(imgLabel,nCodewords) #@UnusedVariable if imgcount==0: dataout = np.hstack((hist.T,imgid,catId)) else: dataout = np.vstack((dataout,np.hstack((hist.T,imgid,catId)))) imgcount+=1 np.savetxt(catboffilepath, dataout, fmt='%d', delimiter=' ', ) select = np.concatenate((np.arange(nCodewords),[nCodewords+1]),axis=1) for iCategory,catname in enumerate(catList): #posLabel = catmap.get(catname) #negLabel = 0 #read the category data which will positive catboffilepath = tempPath+catname+bofext catpos = np.genfromtxt(catboffilepath,dtype=np.int) catpos = catpos.take(select,axis=1) catpos[:,-1] = 1 #posLabel = catpos[0][-1] catset = set(catList) catset.remove(catname) firstvisit = True for cat in catset: #@UnusedVariable catboffilepath = tempPath+catname+bofext if(firstvisit): catneg = np.genfromtxt(catboffilepath,dtype=np.int) firstvisit = False else : catneg = np.concatenate((catneg,np.genfromtxt(catboffilepath,dtype=np.int)),axis=0) #sample the negative data to have equal size as the positive nPos = catpos.shape[0] nNeg = catneg.shape[0] catneg = catneg[np.random.randint(0,nNeg,nPos),:] catneg = catneg.take(select,axis=1) catneg[:,-1] = -1 #combine positive and negative data data = np.concatenate((catpos,catneg),axis=0) #shuffle the rows to aid in random selection of train and test #np.random.shuffle(data) X = data[:,:nCodewords] y = data[:,nCodewords] #labels for cross validation #y2 = np.where(y!=posLabel,0,y) #y2 = np.where(y2==posLabel,1,y2) #cross-validation cv = StratifiedKFold(y, k=nFold) #select classifier classifier = svm.SVC(kernel=kernelType, probability=True) metricstemp = np.zeros((nFold,nMetrics),np.float) for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) print y[test] print probas_[:,1] try: fpr, tpr, thresholds = roc_curve(y[test], probas_[:,1]) #@UnusedVariable roc_auc = auc(fpr, tpr) except: roc_auc = 0. try: precision, recall, thresholds = precision_recall_curve(y[test], probas_[:,1]) #@UnusedVariable pr_auc = auc(recall,precision) except: pr_auc = 0. metricstemp[i] = [roc_auc,pr_auc] rocauc[iLDim,iCategory] = np.mean(metricstemp[0],axis=0) mapauc[iLDim,iCategory] = np.mean(metricstemp[1],axis=0) print '%s classified...' % (catname) outPath = rootDir + dataset + outDir + '%s%s%s%s'%('dimensionality',dataset,kernelType,'.svg') outPath1 = rootDir + dataset + outDir + '%s%s%s%s' % ('dimensionality',dataset,kernelType,'.npz') plt.figure(0) #ax = plt.subplot(111) plt.errorbar(np.arange(1,nDim+1), np.mean(rocauc,axis=1), np.std(rocauc,axis=1), fmt = '-', elinewidth=1, marker = 'x', label = 'AUC-ROC') plt.errorbar(np.arange(1,nDim+1), np.mean(mapauc,axis=1), np.std(mapauc,axis=1), fmt = '--', elinewidth=1, marker = 'o', label = 'MAP') plt.xlabel('Visual Categories') plt.ylabel('Performance Metric') plt.title('BOF Performance: %s : %s' % (dataset,kernelType)) plt.legend(loc="lower right") #ax.set_xticks() #ax.set_xticklabels(ldim,size='small',ha='center') plt.savefig(outPath,format='svg') try: np.savez(outPath1,rocauc,mapauc) except: print 'unable to write file %s' % (outPath1) plt.show() plt.close()