Esempio n. 1
0
def classify_nn(X, y, k):
    m = X.shape[0]
    m_test = int(m*0.25)
    m_train = m - m_test
 
    # Split data in train and test data
    # A random permutation, to split the data randomly
    #np.random.seed(k)
    indices = np.random.permutation(m)
    X_train = X[indices[:m_train]]
    y_train = y[indices[:m_train]]
    X_test  = X[indices[m_train:]]
    y_test  = y[indices[m_train:]]
    
    # Create and fit a nearest-neighbor classifier
    from sklearn.neighbors import NeighborsClassifier
    knn = NeighborsClassifier()
    knn.fit(X_train, y_train)
    print 'knn=%s' % knn
    y_pred = knn.predict(X_test)
    correct = y_pred == y_test
    print 'k=%2d: Num tests=%6d correct=%6d = %2d%%' % (k, correct.shape[0], correct.sum(),
                int(100*correct.sum()/correct.shape[0]))
    if False:
        for i in range(correct.shape[0]):
            print '  %d==%d => %d' % (y_pred[i], y_test[i], correct[i])
        exit()    
def knnClassify(data, n_neighbors=10, nFold=10, beta=1.0, nMetrics=1):
    X = data[:, :-1]
    y = data[:, -1]
    clfParamList = {'n_neighbors': 10, 'algorithm': 'auto'}
    classifier = NeighborsClassifier(**clfParamList)
    cv = StratifiedKFold(y, k=nFold)
    avgprec = np.zeros(nFold)
    for icv, (train, test) in enumerate(cv):
        clf = classifier.fit(X[train], y[train])
        ypred = clf.predict(X[test])
        avgprec[icv] = fbeta_score(y[test], ypred, beta)
    return avgprec
def run_tests(classifier, test_data, c_type, name, k_kmeans, knn_ks, t_shelf):
    for k_knn in knn_ks:
        if c_type == 'knn':
            means, labels = classifier
            cls = NeighborsClassifier(n_neighbors=k_knn)
            cls.fit(means, labels)
        elif c_type == 'svm':
            cls = classifier
        # results:  every test sample is labeled by classifier
        X = np.vstack(test_data)
        start = time.time()
        results = cls.predict(X)
        elapsed_time = time.time() - start
        save_time(elapsed_time, len(X), t_shelf, name, k_kmeans, k_knn)
        targets = make_targets(test_data)
        save_results(results, targets, name, k_kmeans, k_knn)
        print_results(results, targets)
def blWord():
    (options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable
    dataset = options.dataset
    
#    kernelType = options.kernelType
    nFold = options.nFold
    nCodeword = options.nCodeword   
    
    dataPath = rootDir+dataset+bofDir
    catmap = getCatMap(dataset)
    catList = catmap.keys()
    if(nCodeword==1000):
        dataext = bofext
    else:
        dataext = str(nCodeword)+bofext
    nCategory = len(catList)
    
    perfMean = np.zeros(nCategory)
    perfStd = np.zeros(nCategory)
    
    for iCat,catname in enumerate(catList):
        print catname
        #read the category data which will positive
        fname = dataPath+catname+dataext
        catpos = np.genfromtxt(fname,dtype=np.int) # catpos
        catpos = catpos[:,:nCodeword]
        posLabel = np.ones((catpos.shape[0],1),dtype=np.int)
        catpos = np.concatenate((catpos,posLabel),axis=1)
        #read the category data of remaining classes
        for cats in catList:
            if(cats!=catname):
                firstvisit = True
                if(firstvisit):
                    catneg = np.genfromtxt(fname,dtype=np.int)
                    firstvisit = False
                else : 
                    catneg = np.concatenate((catneg,np.genfromtxt(fname,dtype=np.int)),axis=0)
        #sample the negative data to have equal size as the positive
        nPos = catpos.shape[0]
        nNeg = catneg.shape[0]
        catneg = catneg[np.random.randint(0,nNeg,nPos),:] #catneg
        catneg = catneg[:,:nCodeword]
        negLabel = np.zeros((catneg.shape[0],1),dtype=np.int)
        catneg = np.concatenate((catneg,negLabel),axis=1)
        #combine positive and negative data
        data = np.concatenate((catpos,catneg),axis=0)
        #shuffle the rows to aid in random selection of train and test
        np.random.shuffle(data)
        
        X = data[:,:nCodeword]
        y = data[:,nCodeword]
        
#        clfParamList = {'kernel': kernelType, 'gamma': 1e-3, 'C': 1, 'degree':4, 'probability':True,'shrinking':True,'cache_size':1000}
#        classifier = SVC(**clfParamList)
        cv = StratifiedKFold(y, k=nFold)
        clfParamList = {'n_neighbors':10,'algorithm':'auto'}
        classifier = NeighborsClassifier(**clfParamList)
        avgprec = np.zeros(nFold)
        for icv,(train, test) in enumerate(cv):
            clf = classifier.fit(X[train], y[train])
#            probas_ = clf.predict_proba(X[test])
#            precision, recall, thresholds = precision_recall_curve(y[test], probas_[:,1]) #@UnusedVariable
#            avgprec[icv] = auc(recall,precision)
            ypred = clf.predict(X[test])
            avgprec[icv] = f1_score(y[test],ypred)
        perfMean[iCat] = np.mean(avgprec)
        perfStd[iCat] = np.std(avgprec)
        
    if(options.verbose):
        print perfMean
        print perfStd
    
    return [perfMean,perfStd]
Esempio n. 5
0
data_pca = (normalize(data_pca[0]).tolist(), data_pca[1])

pca = PCA()
pca.fit(data_pca[0])
data_pca = (pca.transform(data_pca[0]).tolist(), data_pca[1])

kneighbors_grid = [1, 3, 5, 11, 21, 31]
svm_linear_grid = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
svm_rbf_grid = []
for i in svm_linear_grid:
    for j in svm_linear_grid:
        svm_rbf_grid.append([i, j])
randforest_grid = [2, 3, 5, 10, 20, 40, 60]

kneighbors_rates = rates(data, kneighbors_grid,
                         lambda K: NeighborsClassifier(n_neighbors=K))
print_result(kneighbors_rates)
kneighbors_pca_rates = rates(data_pca, kneighbors_grid,
                             lambda K: NeighborsClassifier(n_neighbors=K))
print_result(kneighbors_pca_rates)

svm_linear_rates = rates(data, svm_linear_grid,
                         lambda C: SVC(C=C, kernel='linear'))
print_result(svm_linear_rates)
svm_linear_pca_rates = rates(data_pca, svm_linear_grid,
                             lambda C: SVC(C=C, kernel='linear'))
print_result(svm_linear_pca_rates)

svm_rbf_rates = rates(data, svm_rbf_grid,
                      lambda C: SVC(C=C[0], gamma=C[1], kernel='rbf'))
print_result(svm_rbf_rates)
Esempio n. 6
0
def test(train_data, test_data, train_class, test_class):
    neigh_1 = NeighborsClassifier(n_neighbors=1)
    neigh_1.fit(train_data, train_class)

    neigh_3 = NeighborsClassifier(n_neighbors=3)
    neigh_3.fit(train_data, train_class)

    neigh_5 = NeighborsClassifier(n_neighbors=5)
    neigh_5.fit(train_data, train_class)

    neigh_11 = NeighborsClassifier(n_neighbors=11)
    neigh_11.fit(train_data, train_class)

    neigh_17 = NeighborsClassifier(n_neighbors=17)
    neigh_17.fit(train_data, train_class)

    neigh_21 = NeighborsClassifier(n_neighbors=21)
    neigh_21.fit(train_data, train_class)

    print success_rate(neigh_1, test_data, test_class)
    print success_rate(neigh_3, test_data, test_class)
    print success_rate(neigh_5, test_data, test_class)
    print success_rate(neigh_11, test_data, test_class)
    print success_rate(neigh_17, test_data, test_class)
    print success_rate(neigh_21, test_data, test_class)
Esempio n. 7
0
def test(train_data, test_data, train_class, test_class):
    neigh_1 = NeighborsClassifier(n_neighbors=1)
    neigh_1.fit(train_data, train_class)

    neigh_3 = NeighborsClassifier(n_neighbors=3)
    neigh_3.fit(train_data, train_class)

    neigh_5 = NeighborsClassifier(n_neighbors=5)
    neigh_5.fit(train_data, train_class)

    neigh_11 = NeighborsClassifier(n_neighbors=11)
    neigh_11.fit(train_data, train_class)

    neigh_17 = NeighborsClassifier(n_neighbors=17)
    neigh_17.fit(train_data, train_class)

    neigh_21 = NeighborsClassifier(n_neighbors=21)
    neigh_21.fit(train_data, train_class)

    print success_rate(neigh_1, test_data, test_class)
    print success_rate(neigh_3, test_data, test_class)
    print success_rate(neigh_5, test_data, test_class)
    print success_rate(neigh_11, test_data, test_class)
    print success_rate(neigh_17, test_data, test_class)
    print success_rate(neigh_21, test_data, test_class)