def knnClassify(data, n_neighbors=10, nFold=10, beta=1.0, nMetrics=1):
    X = data[:, :-1]
    y = data[:, -1]
    clfParamList = {'n_neighbors': 10, 'algorithm': 'auto'}
    classifier = NeighborsClassifier(**clfParamList)
    cv = StratifiedKFold(y, k=nFold)
    avgprec = np.zeros(nFold)
    for icv, (train, test) in enumerate(cv):
        clf = classifier.fit(X[train], y[train])
        ypred = clf.predict(X[test])
        avgprec[icv] = fbeta_score(y[test], ypred, beta)
    return avgprec
Esempio n. 2
0
def test(train_data, test_data, train_class, test_class):
    neigh_1 = NeighborsClassifier(n_neighbors=1)
    neigh_1.fit(train_data, train_class)

    neigh_3 = NeighborsClassifier(n_neighbors=3)
    neigh_3.fit(train_data, train_class)

    neigh_5 = NeighborsClassifier(n_neighbors=5)
    neigh_5.fit(train_data, train_class)

    neigh_11 = NeighborsClassifier(n_neighbors=11)
    neigh_11.fit(train_data, train_class)

    neigh_17 = NeighborsClassifier(n_neighbors=17)
    neigh_17.fit(train_data, train_class)

    neigh_21 = NeighborsClassifier(n_neighbors=21)
    neigh_21.fit(train_data, train_class)

    print success_rate(neigh_1, test_data, test_class)
    print success_rate(neigh_3, test_data, test_class)
    print success_rate(neigh_5, test_data, test_class)
    print success_rate(neigh_11, test_data, test_class)
    print success_rate(neigh_17, test_data, test_class)
    print success_rate(neigh_21, test_data, test_class)
def blWord():
    (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable
    dataset = options.dataset
    
#    kernelType = options.kernelType
    nFold = options.nFold
    nCodeword = options.nCodeword   
    
    dataPath = rootDir+dataset+bofDir
    catmap = getCatMap(dataset)
    catList = catmap.keys()
    if(nCodeword==1000):
        dataext = bofext
    else:
        dataext = str(nCodeword)+bofext
    nCategory = len(catList)
    
    perfMean = np.zeros(nCategory)
    perfStd = np.zeros(nCategory)
    
    for iCat,catname in enumerate(catList):
        print catname
        #read the category data which will positive
        fname = dataPath+catname+dataext
        catpos = np.genfromtxt(fname,dtype=np.int) # catpos
        catpos = catpos[:,:nCodeword]
        posLabel = np.ones((catpos.shape[0],1),dtype=np.int)
        catpos = np.concatenate((catpos,posLabel),axis=1)
        #read the category data of remaining classes
        for cats in catList:
            if(cats!=catname):
                firstvisit = True
                if(firstvisit):
                    catneg = np.genfromtxt(fname,dtype=np.int)
                    firstvisit = False
                else : 
                    catneg = np.concatenate((catneg,np.genfromtxt(fname,dtype=np.int)),axis=0)
        #sample the negative data to have equal size as the positive
        nPos = catpos.shape[0]
        nNeg = catneg.shape[0]
        catneg = catneg[np.random.randint(0,nNeg,nPos),:] #catneg
        catneg = catneg[:,:nCodeword]
        negLabel = np.zeros((catneg.shape[0],1),dtype=np.int)
        catneg = np.concatenate((catneg,negLabel),axis=1)
        #combine positive and negative data
        data = np.concatenate((catpos,catneg),axis=0)
        #shuffle the rows to aid in random selection of train and test
        np.random.shuffle(data)
        
        X = data[:,:nCodeword]
        y = data[:,nCodeword]
        
#        clfParamList = {'kernel': kernelType, 'gamma': 1e-3, 'C': 1, 'degree':4, 'probability':True,'shrinking':True,'cache_size':1000}
#        classifier = SVC(**clfParamList)
        cv = StratifiedKFold(y, k=nFold)
        clfParamList = {'n_neighbors':10,'algorithm':'auto'}
        classifier = NeighborsClassifier(**clfParamList)
        avgprec = np.zeros(nFold)
        for icv,(train, test) in enumerate(cv):
            clf = classifier.fit(X[train], y[train])
#            probas_ = clf.predict_proba(X[test])
#            precision, recall, thresholds = precision_recall_curve(y[test], probas_[:,1]) #@UnusedVariable
#            avgprec[icv] = auc(recall,precision)
            ypred = clf.predict(X[test])
            avgprec[icv] = f1_score(y[test],ypred)
        perfMean[iCat] = np.mean(avgprec)
        perfStd[iCat] = np.std(avgprec)
        
    if(options.verbose):
        print perfMean
        print perfStd
    
    return [perfMean,perfStd]
Esempio n. 4
0
data_pca = (normalize(data_pca[0]).tolist(), data_pca[1])

pca = PCA()
pca.fit(data_pca[0])
data_pca = (pca.transform(data_pca[0]).tolist(), data_pca[1])

kneighbors_grid = [1, 3, 5, 11, 21, 31]
svm_linear_grid = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
svm_rbf_grid = []
for i in svm_linear_grid:
    for j in svm_linear_grid:
        svm_rbf_grid.append([i, j])
randforest_grid = [2, 3, 5, 10, 20, 40, 60]

kneighbors_rates = rates(data, kneighbors_grid,
                         lambda K: NeighborsClassifier(n_neighbors=K))
print_result(kneighbors_rates)
kneighbors_pca_rates = rates(data_pca, kneighbors_grid,
                             lambda K: NeighborsClassifier(n_neighbors=K))
print_result(kneighbors_pca_rates)

svm_linear_rates = rates(data, svm_linear_grid,
                         lambda C: SVC(C=C, kernel='linear'))
print_result(svm_linear_rates)
svm_linear_pca_rates = rates(data_pca, svm_linear_grid,
                             lambda C: SVC(C=C, kernel='linear'))
print_result(svm_linear_pca_rates)

svm_rbf_rates = rates(data, svm_rbf_grid,
                      lambda C: SVC(C=C[0], gamma=C[1], kernel='rbf'))
print_result(svm_rbf_rates)