def blTopic(): (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable dataset = options.dataset neighbors = options.neighbors nFold = options.nFold nTopic = options.nTopic beta = options.beta dataPath = rootDir+dataset+bofDir catmap = getCatMap(dataset) catList = catmap.keys() #remove if bof of 1000 words is computed for all categories if(nTopic==1000): dataext = bofext else: dataext = str(nTopic)+bofext nCategory = len(catList) perfMean = np.zeros(nCategory) perfStd = np.zeros(nCategory) for iCategory,catname in enumerate(catList): print catname #read the category data which will positive fname = dataPath+catname+dataext catpos = np.genfromtxt(fname,dtype=np.int) catpos = catpos[:,:nTopic+1] catpos[:,nTopic] = 1 #read the category data of remaining classes for cats in catList: if(cats!=catname): firstvisit = True if(firstvisit): catneg = np.genfromtxt(fname,dtype=np.int) firstvisit = False else : catneg = np.concatenate((catneg,np.genfromtxt(fname,dtype=np.int)),axis=0) #sample the negative data to have equal size as the positive nPos = catpos.shape[0] nNeg = catneg.shape[0] catneg = catneg[np.random.randint(0,nNeg,nPos),:] catneg = catneg[:,:nTopic+1] catneg[:,nTopic] = 0 #combine positive and negative data botData = np.concatenate((catpos,catneg),axis=0) #shuffle the rows to aid in random selection of train and test np.random.shuffle(botData) if(options.verbose): print 'classifying...' catPerfKNN = classify.knnClassify(botData, neighbors, nFold, beta, nMetrics) perfMean[iCategory] = np.mean(catPerfKNN) perfStd[iCategory] = np.std(catPerfKNN) if(options.verbose): print perfMean print perfStd return perfMean
def ccTopic(): (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable dataset = options.dataset nRowCluster = options.nRowCluster nTopic = options.nTopic ccType = options.ccType kernelType = options.kernelType nFold = options.nFold nCodeword = options.nCodeword beta = options.beta if (options.verbose): print dataset, nRowCluster, nTopic, ccType, kernelType, beta, nFold, nCodeword print options dataPath = rootDir + dataset + bofDir catmap = getCatMap(dataset) catList = catmap.keys() dataext = str(nCodeword) + bofext nCategory = len(catList) perfMean = np.zeros(nCategory) perfStd = np.zeros(nCategory) for iCategory, catName in enumerate(catList): fileName = dataPath + catName + dataext catpos = np.genfromtxt(fileName, dtype=np.int) if (options.verbose): print catName catpos = catpos[:, :nCodeword + 1] catpos[:, nCodeword] = 1 #read the category data of remaining classes for cats in catList: if (cats != catName): firstvisit = True if (firstvisit): catneg = np.genfromtxt(fileName, dtype=np.int) firstvisit = False else: catneg = np.concatenate( (catneg, np.genfromtxt(fileName, dtype=np.int)), axis=0) #sample the negative data to have equal size as the positive nPos = catpos.shape[0] nNeg = catneg.shape[0] catneg = catneg[np.random.randint(0, nNeg, nPos), :] catneg = catneg[:, :nCodeword + 1] catneg[:, nCodeword] = 0 #combine positive and negative data bofData = np.concatenate((catpos, catneg), axis=0) if (options.verbose): print 'co-clustering...' ccData = cocluster.coclust(bofData, dataset, nRowCluster, nTopic, ccType) ccCol = np.array([int(i) for i in ccData[1].split()]) tempCC = np.zeros((bofData.shape[0], nTopic)) for i in np.arange(bofData.shape[0]): for j in sorted(set(ccCol)): tempCC[i, j] = np.sum(bofData[i, ccCol == j]) botData = np.vstack((tempCC.T, bofData[:, -1])).T if (options.verbose): print 'classifying...' #catPerfSVM = classify.ccClassify(botData, kernelType, nFold, beta, nMetrics) catPerfKNN = classify.knnClassify(botData, 10, nFold, beta, nMetrics) #perfMean[iCategory,0] = np.mean(catPerfSVM) #perfStd[iCategory,0] = np.std(catPerfSVM) perfMean[iCategory] = np.mean(catPerfKNN) perfStd[iCategory] = np.std(catPerfKNN) if (options.verbose): print perfMean print perfStd return [perfMean, perfStd]
def ccWord(): (options,args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable dataset = options.dataset nRowCluster = options.nCodeword nColCluster = options.lowerDim ccType = options.ccType kernelType = options.kernelType beta = options.beta figfmt = options.figfmt nFold = options.nFold desc = options.desc nClusterSample = options.nClusterSample if(options.verbose): print dataset,nRowCluster,nColCluster,ccType,kernelType,beta,figfmt,nFold,nClusterSample dataPath = rootDir+dataset+dataDir catmap = getCatMap(dataset) catList = catmap.keys() dataext = '.'+desc nCategory = len(catList) dim = descdim.get(desc) nSamplePerCategory = int(np.round(nClusterSample/nCategory)) if(options.verbose): print 'collating cluster data...' clusterData = collateClusterData(dataPath,dataext,catList,nSamplePerCategory,dim) if(options.verbose): print 'coclustering...' ccData = cocluster.coclustWord(clusterData,dataset,nRowCluster,nColCluster,ccType) ccRow = np.array([int(i) for i in ccData[0].split()]) ccCol = np.array([int(i) for i in ccData[1].split()]) cctemp = np.zeros((clusterData.shape[0],nColCluster)) codebook = np.zeros((nRowCluster,nColCluster)) for i in np.arange(clusterData.shape[0]): for j in sorted(set(ccCol)): cctemp[i,j] = np.linalg.norm(clusterData[i,ccCol==j], 2) ############### # cctemp[i,j] = np.mean(clusterData[i,ccCol==j]) for i in sorted(set(ccRow)): codebook[i,:] = np.mean(cctemp[ccRow==i,:],0) if(options.verbose): print 'writing bof...' writebof(dataset,catList,codebook,ccCol,nRowCluster,desc,ccType,nRowCluster,nColCluster) perfMean = np.zeros(nCategory) perfStd = np.zeros(nCategory) for iCategory,catName in enumerate(catList): # catboffilepath = rootDir+dataset+bofDir+catName+'_cc'+bofext catboffilepath = rootDir+dataset+bofDir+catName+str(ccType)+str(nRowCluster)+str(nColCluster)+bofext catpos = np.genfromtxt(catboffilepath,dtype=np.int) # catpos catpos = catpos[:,:nColCluster+1] catpos[:,nColCluster] = 1 for catname in catList: if(catname!=catName): firstvisit = True # catboffilepath = rootDir+dataset+bofDir+catname+'_cc'+bofext catboffilepath = rootDir+dataset+bofDir+catName+str(ccType)+str(nRowCluster)+str(nColCluster)+bofext if(firstvisit): catneg = np.genfromtxt(catboffilepath,dtype=np.int) firstvisit = False else : catneg = np.concatenate((catneg,np.genfromtxt(catboffilepath,dtype=np.int)),axis=0) nPos = catpos.shape[0] nNeg = catneg.shape[0] catneg = catneg[np.random.randint(0,nNeg,nPos),:] #catneg catneg = catneg[:,:nColCluster+1] catneg[:,nColCluster] = 0 #combine positive and negative data catData = np.concatenate((catpos,catneg),axis=0) #shuffle the rows to aid in random selection of train and test np.random.shuffle(catData) # catPerf = classify.ccClassify(catData, kernelType, nFold, beta, nMetrics) catPerf = classify.knnClassify(catData, 10, nFold, beta, nMetrics) perfMean[iCategory] = np.mean(catPerf) perfStd[iCategory] = np.std(catPerf) if(options.verbose): print perfMean print perfStd #plotresult.ccPlot(dataset, catList, perfMean, perfStd, figfmt, 'BoW', ccType) return [perfMean,perfStd]