trainpoints=750 testpoints=25 num_train_imgs=209 num_test_imgs=19 minhash=4 #param_values= np.logspace(np.log2(1),np.log2(30),num=10,base=2).astype(np.int) param_values=np.linspace(10,300,num=10).astype(np.int) print param_values values=[] f_maxes=[] #for val in param_values: # candidates=val train_input, train_label=knn_trainer_helper.sortClasses(pad,trainpoints,knn_trainer_helper.readImages(num_train_imgs,'input/train/','labels/train/'),seed=42) test_input, test_label=knn_trainer_helper.sortClasses(pad,testpoints,knn_trainer_helper.readImages(num_test_imgs,'input/test/','labels/test/'),seed=42,equalClasses=False) #reduce dimensionality of data pca = PCA().fit(train_input[:(trainpoints*num_train_imgs)*.1]) variance=pca.explained_variance_ratio_ sum = 0 components=0 while sum<=.995: sum+=variance[components] components+=1 pca = PCA(n_components=components) train_data_fit=pca.fit(train_input[:(trainpoints*num_train_imgs)*.1]) train_data_transform=pca.transform(train_input[:]) del train_input
test_imgcount = 19 testPoints = 5 train_imgcount = 209 trainPoints = 12500 pad = 4 train_inputpath='input/train/' train_labelpath='labels/train/' test_inputpath='input/test/' test_labelpath='labels/test/' testimages = knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath) trainimages = knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath) start = time.time() train_data=knn_trainer_helper.sortClasses(10,trainPoints, trainimages, 1) test_data=knn_trainer_helper.sortClasses(10,testPoints, testimages, 1) knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=25, weights='uniform')#, algorithm='ball_tree') print "hi" knn.fit(train_data[0], train_data[1]) print time.time()-start test_predict = knn.predict(test_data[0]) score = sklearn.metrics.accuracy_score(test_data[1], test_predict, normalize=True) print score print time.time()-start #train_data=knn_trainer_helper.sortClasses(pad, numPoints, knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath)) #test_data=knn_trainer_helper.sortClasses(pad,numPoints,knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath)) #print (len(train_data[1])*255)/np.sum(train_data[1]) #print (len(test_data[1])*255)/np.sum(test_data[1])
import test import knn_trainer_helper import sklearn.neighbors from sklearn.neighbors import LSHForest import time import border import matplotlib.pyplot as plt import mahotas import numpy as np from scipy import stats side = 1024 start = time.time() k=25 pad=200 numpoints=12 train_data=knn_trainer_helper.sortClasses(pad,numpoints,knn_trainer_helper.readImages(209,'input/train/','labels/train/'),seed=42)[0] #plt.gray() #print train_data[0] #print len(train_data[0]) #plt.imshow(np.array(train_data[0]).reshape(401,401)) #mahotas.imsave('results/pca images/normal.tif', np.uint16(np.array(train_data[0]).reshape(401,401))) #plt.title('normal') #plt.show() pca = PCA().fit(train_data) variance=pca.explained_variance_ratio_ sum = 0 i=0 accuracy=.995
test_imgcount = 19 train_imgcount = 209 numPoints=1000 k = 20 estimators=20 candidates=200 minhash=4 padValues = np.linspace(1, 30,6).astype(np.int) knn_times = [] lshf_times = [] for pad in padValues: train_input, train_label=knn_trainer_helper.sortClasses(pad,numPoints,knn_trainer_helper.readImages(209,'input/train/','labels/train/'),seed=42) test_input, test_label=knn_trainer_helper.sortClasses(pad,1,knn_trainer_helper.readImages(1,'input/test/','labels/test/'),seed=42,equalClasses=False) knn = NearestNeighbors(n_neighbors=k, weights='uniform', algorithm='brute').fit(train_input) knn_start = time.time() exact_neighbors = knn.kneighbors(test_input, return_distance=False) knn_end = time.time() lshf = LSHForest(n_estimators=estimators,n_candidates=candidates,min_hash_match=minhash,n_neighbors=k,random_state=1).fit(train_input) lshf_start=time.time() approx_neighbors = lshf.kneighbors(test_input, return_distance=False) lshf_end=time.time() knn_times.append(knn_end-knn_start) lshf_times.append(lshf_end-lshf_start) print pad
estimators = 20 candidates = 20 numpoints = 100 num_train_imgs = 209 minhash = 4 use_equal_classes = True usePCA = True input_img = mahotas.imread("input/test/0.tif") input_bordered = border.createBorder(pad, input_img) label = "labels/test/0.tif" # input = border.outputMatrix(pad,input_img) train_input, train_label = knn_trainer_helper.sortClasses( pad, numpoints, knn_trainer_helper.readImages(num_train_imgs, "input/train/", "labels/train/"), seed=42, equalClasses=use_equal_classes, ) if usePCA: # reduce dimensionality of training data pca = PCA().fit(train_input) variance = pca.explained_variance_ratio_ sum = 0 components = 0 while sum <= 0.995: sum += variance[components] components += 1 pca = PCA(n_components=components) train_data = pca.fit_transform(train_input)
elif(dump): file=open(filepath, 'wb') cPickle.dump(list, file, -1) file.close() start=time.time() plt.gray() pad=8 k=27 trainImgs=209 numPoints=200 inputpath = 'input/train/' labelpath = 'labels/train' print 'generating data' training_data=knn_trainer_helper.sortClasses(pad, numPoints,knn_trainer_helper.readImages(trainImgs, inputpath, labelpath)) time1=time.time() print 'this step took ' +str(time1-start) + ' seconds' +' seconds, total time elapsed: ' + str(time1-start) + ' seconds' #print 'pickling/unpickling data' #pickle_help('pickle/full_data.pkl', list=full_data, dump=True) #full_data=pickle_help('pickle/full_data.pkl', load=True) #tree_input = sklearn.neighbors.BallTree(training_data[0], leaf_size=30) #pickle_help('pickle/ball_tree.pkl', list=tree_input, dump=True) time2=time.time() #print 'this step took ' +str(time2-time1) +' seconds, total time elapsed: ' + str(time2-start) + ' seconds' print 'creating classifier' knn=sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='uniform', algorithm='ball_tree')
import numpy as np import matplotlib.pyplot as plt import sklearn.neighbors test_imgcount = 19 train_imgcount = 209 pad = 5 k = 20 numPoints=3 train_inputpath='input/train/' train_labelpath='labels/train/' test_inputpath='input/test/' test_labelpath='labels/test/' while numPoints < 400: start = time.time() train_data=knn_trainer_helper.sortClasses(pad, numPoints, knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath), seed=42) test_data=knn_trainer_helper.sortClasses(pad,500,knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath), seed=42) knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k, weights='uniform', algorithm='ball_tree') knn.fit(train_data[0], train_data[1]) test_predict = knn.predict(test_data[0]) end = time.time() total = end-start print '('+str(numPoints)+', '+str(total)+')' plt.plot(numPoints, total, 'ro') numPoints *=2 plt.show()
test_imgcount = 19 testPoints = 50 train_imgcount = 209 trainPoints = 12500 pad = 10 train_inputpath='input/train/' train_labelpath='labels/train/' test_inputpath='input/test/' test_labelpath='labels/test/' testimages = knn_trainer_helper.readImages(test_imgcount, test_inputpath, test_labelpath) trainimages = knn_trainer_helper.readImages(train_imgcount, train_inputpath, train_labelpath) start = time.time() train_data=knn_trainer_helper.sortClasses(10,trainPoints, trainimages, seed=42) test_data=knn_trainer_helper.sortClasses(10,testPoints, testimages, seed=42) print time.time()-start lshf = LSHForest(n_estimators=20, n_candidates=200, n_neighbors=20).fit(train_data[0]) print time.time()-start approx_neighbors = lshf.kneighbors(test_data[0], return_distance=False) y_hat=[] print time.time()-start for i in range(len(approx_neighbors)): temp = approx_neighbors[i] for j in range(len(temp)): temp[j] = train_data[1][temp[j]] y_hat.append(stats.mode(temp)[0][0]) print sklearn.metrics.accuracy_score(test_data[1], y_hat)