コード例 #1
0
ファイル: lsh_tune.py プロジェクト: goldsmitha/knn-classifier
trainpoints=750
testpoints=25
num_train_imgs=209
num_test_imgs=19
minhash=4

#param_values= np.logspace(np.log2(1),np.log2(30),num=10,base=2).astype(np.int)
param_values=np.linspace(10,300,num=10).astype(np.int)
print param_values
values=[]
f_maxes=[]
#for val in param_values:
    
#    candidates=val

train_input, train_label=knn_trainer_helper.sortClasses(pad,trainpoints,knn_trainer_helper.readImages(num_train_imgs,'input/train/','labels/train/'),seed=42)
test_input, test_label=knn_trainer_helper.sortClasses(pad,testpoints,knn_trainer_helper.readImages(num_test_imgs,'input/test/','labels/test/'),seed=42,equalClasses=False)

#reduce dimensionality of data
pca = PCA().fit(train_input[:(trainpoints*num_train_imgs)*.1])
variance=pca.explained_variance_ratio_
sum = 0
components=0
while sum<=.995:
   sum+=variance[components]
   components+=1

pca = PCA(n_components=components)
train_data_fit=pca.fit(train_input[:(trainpoints*num_train_imgs)*.1])
train_data_transform=pca.transform(train_input[:])
del train_input
コード例 #2
0
ファイル: tuning.py プロジェクト: goldsmitha/knn-classifier
import numpy as np
import time
import sklearn.metrics
import random


test_imgcount = 19
testPoints = 5
train_imgcount = 209
trainPoints = 12500
pad = 4
train_inputpath='input/train/'
train_labelpath='labels/train/'
test_inputpath='input/test/'
test_labelpath='labels/test/'
testimages = knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath)
trainimages = knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath)

start = time.time()
train_data=knn_trainer_helper.sortClasses(10,trainPoints, trainimages, 1)
test_data=knn_trainer_helper.sortClasses(10,testPoints, testimages, 1)

knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=25, weights='uniform')#, algorithm='ball_tree')
print "hi"
knn.fit(train_data[0], train_data[1])
print time.time()-start
test_predict = knn.predict(test_data[0])
score = sklearn.metrics.accuracy_score(test_data[1], test_predict, normalize=True)
print score
print time.time()-start
コード例 #3
0
ファイル: pca.py プロジェクト: goldsmitha/knn-classifier
import test
import knn_trainer_helper
import sklearn.neighbors
from sklearn.neighbors import LSHForest
import time
import border
import matplotlib.pyplot as plt
import mahotas
import numpy as np
from scipy import stats
side = 1024
start = time.time()
k=25
pad=200
numpoints=12
train_data=knn_trainer_helper.sortClasses(pad,numpoints,knn_trainer_helper.readImages(209,'input/train/','labels/train/'),seed=42)[0]

#plt.gray()
#print train_data[0]
#print len(train_data[0])
#plt.imshow(np.array(train_data[0]).reshape(401,401))
#mahotas.imsave('results/pca images/normal.tif', np.uint16(np.array(train_data[0]).reshape(401,401)))
#plt.title('normal')
#plt.show()


pca = PCA().fit(train_data)
variance=pca.explained_variance_ratio_
sum = 0
i=0
accuracy=.995
コード例 #4
0
test_imgcount = 19
train_imgcount = 209
numPoints=1000
k = 20
estimators=20
candidates=200
minhash=4

padValues = np.linspace(1, 30,6).astype(np.int)
knn_times = []
lshf_times = []

for pad in padValues:
    
    
    train_input, train_label=knn_trainer_helper.sortClasses(pad,numPoints,knn_trainer_helper.readImages(209,'input/train/','labels/train/'),seed=42)
    test_input, test_label=knn_trainer_helper.sortClasses(pad,1,knn_trainer_helper.readImages(1,'input/test/','labels/test/'),seed=42,equalClasses=False)
    
    knn = NearestNeighbors(n_neighbors=k, weights='uniform', algorithm='brute').fit(train_input)
    knn_start = time.time()
    exact_neighbors = knn.kneighbors(test_input, return_distance=False)
    knn_end = time.time()
    
    lshf = LSHForest(n_estimators=estimators,n_candidates=candidates,min_hash_match=minhash,n_neighbors=k,random_state=1).fit(train_input)
    lshf_start=time.time()
    approx_neighbors = lshf.kneighbors(test_input, return_distance=False)
    lshf_end=time.time()
    
    knn_times.append(knn_end-knn_start)
    lshf_times.append(lshf_end-lshf_start)
    print pad
コード例 #5
0
test_imgcount = 19
train_imgcount = 209
pad = 24
k = 20
estimators = 20
candidates = 200
minhash = 4

pointValues = np.linspace(1, 5000, 6).astype(np.int)
knn_times = []
lshf_times = []

for numPoints in pointValues:

    train_input, train_label = knn_trainer_helper.sortClasses(
        pad, numPoints, knn_trainer_helper.readImages(209, "input/train/", "labels/train/"), seed=42
    )
    test_input, test_label = knn_trainer_helper.sortClasses(
        pad, 1, knn_trainer_helper.readImages(1, "input/test/", "labels/test/"), seed=42, equalClasses=False
    )

    knn = NearestNeighbors(n_neighbors=k, weights="uniform", algorithm="brute").fit(train_input)
    knn_start = time.time()
    exact_neighbors = knn.kneighbors(test_input, return_distance=False)
    knn_end = time.time()

    lshf = LSHForest(
        n_estimators=estimators, n_candidates=candidates, min_hash_match=minhash, n_neighbors=k, random_state=1
    ).fit(train_input)
    lshf_start = time.time()
    approx_neighbors = lshf.kneighbors(test_input, return_distance=False)
コード例 #6
0
estimators = 20
candidates = 20
numpoints = 100
num_train_imgs = 209
minhash = 4
use_equal_classes = True
usePCA = True
input_img = mahotas.imread("input/test/0.tif")
input_bordered = border.createBorder(pad, input_img)
label = "labels/test/0.tif"

# input = border.outputMatrix(pad,input_img)
train_input, train_label = knn_trainer_helper.sortClasses(
    pad,
    numpoints,
    knn_trainer_helper.readImages(num_train_imgs, "input/train/", "labels/train/"),
    seed=42,
    equalClasses=use_equal_classes,
)

if usePCA:
    # reduce dimensionality of training data
    pca = PCA().fit(train_input)
    variance = pca.explained_variance_ratio_
    sum = 0
    components = 0
    while sum <= 0.995:
        sum += variance[components]
        components += 1
    pca = PCA(n_components=components)
    train_data = pca.fit_transform(train_input)
コード例 #7
0
    elif(dump):
        file=open(filepath, 'wb')
        cPickle.dump(list, file, -1)
        file.close()
    
start=time.time()

plt.gray()
pad=8
k=27
trainImgs=209
numPoints=200
inputpath = 'input/train/'
labelpath = 'labels/train'
print 'generating data'
training_data=knn_trainer_helper.sortClasses(pad, numPoints,knn_trainer_helper.readImages(trainImgs, inputpath, labelpath))
time1=time.time()
print 'this step took ' +str(time1-start) + ' seconds' +' seconds, total time elapsed: ' + str(time1-start) + ' seconds'


#print 'pickling/unpickling data'
#pickle_help('pickle/full_data.pkl', list=full_data, dump=True)
#full_data=pickle_help('pickle/full_data.pkl', load=True)
#tree_input = sklearn.neighbors.BallTree(training_data[0], leaf_size=30)
#pickle_help('pickle/ball_tree.pkl', list=tree_input, dump=True)
time2=time.time()
#print 'this step took ' +str(time2-time1) +' seconds, total time elapsed: ' + str(time2-start) + ' seconds'


print 'creating classifier'
knn=sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='uniform', algorithm='ball_tree')
コード例 #8
0
import numpy as np
import matplotlib.pyplot as plt
import sklearn.neighbors

test_imgcount = 19
train_imgcount = 209
pad = 5
k = 20
numPoints=3
train_inputpath='input/train/'
train_labelpath='labels/train/'
test_inputpath='input/test/'
test_labelpath='labels/test/'


while numPoints < 400:
    start = time.time()
    
    train_data=knn_trainer_helper.sortClasses(pad, numPoints, knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath), seed=42)
    test_data=knn_trainer_helper.sortClasses(pad,500,knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath), seed=42)
    
    knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='uniform', algorithm='ball_tree')
    knn.fit(train_data[0], train_data[1])
    test_predict = knn.predict(test_data[0])
    
    end = time.time()
    total = end-start
    print '('+str(numPoints)+', '+str(total)+')'
    plt.plot(numPoints, total, 'ro')
    numPoints *=2
plt.show()