def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data', 'rb')._checkReadable( ) == 0: # Check if file exist create it if it doesn't print("im here") # Just wanted to check if it was going in here x = center_matrix_SVD( digits.train_Images ) # Creates a class with our svd and associated info pickle.dump(x, open('Training SVD Data', 'wb')) else: x = pickle.load(open('Training SVD Data', 'rb')) # If we already have the file just load it if 0: # if this is zero skip test_Images_Center = np.subtract( digits.test_Images, np.repeat(x.centers, digits.test_Images.shape[0], 0)) tic() myLDA = LDA() # Create a new instance of the LDA class new_train = myLDA.fit_transform( x.PCA[:, :154], digits.train_Labels) # It will fit based on x.PCA new_test = myLDA.transform(test_Images_Center @ np.transpose( x.V[:154, :])) # get my transformed test dataset Knn_labels, nearest = KNN(new_train, digits.train_Labels, new_test, 10) # Run kNN on the new data toc() pickle.dump(Knn_labels, open('FDAKNN_Lables', 'wb')) pickle.dump(nearest, open('FDAKNN_neastest', 'wb')) fda = pickle.load(open('FDAKNN_Lables', 'rb')) labels_Full = pickle.load(open('KNN_Full', 'rb')) labels_50 = pickle.load(open('KNN_50', 'rb')) errors_fda, ind_fda = class_error_rate(fda, digits.test_labels) errors_near, ind_near = class_error_rate(labels_Full, digits.test_labels) errors_50, ind_50 = class_error_rate(labels_50, digits.test_labels) plt.figure() plt.plot(np.arange(10) + 1, errors_fda, color='Green', marker='o', markersize=10, label='fda') #plots the 82.5% plt.plot(np.arange(10) + 1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.plot(np.arange(10) + 1, errors_50, color='Yellow', marker='o', markersize=10, label='kNN 50') plt.grid(1) # Turns the grid on plt.title('Plot of Knn with FDA Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show() print(confusion_matrix(digits.test_labels, labels_Full[5])) print(confusion_matrix(digits.test_labels, fda[5])) print(confusion_matrix(digits.test_labels, labels_50[5])) """
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't print("im here") x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info pickle.dump(x,open('Training SVD Data','wb')) else: x = pickle.load(open('Training SVD Data','rb')) # If we already have the file just load it if 0: test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0)) tic() labels = local_kmeans_class(x.PCA[:,:50],digits.train_Labels,[email protected](x.V[:50,:]),10) toc() pickle.dump(labels,open('Loc_kmeans_50_lab','wb')) loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb')) loc_50 = pickle.load(open('Loc_kmeans_50_lab','rb')) labels_Full = pickle.load(open('KNN_Full','rb')) # Have to transpose these because they came out backwards should fix if i use this agian errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels) errors_50,ind_50 = class_error_rate(np.transpose(loc_50),digits.test_labels) errors_near,ind_50 = class_error_rate(labels_Full,digits.test_labels) plt.figure() plt.plot(np.arange(10)+1, errors_full, color='Green', marker='o', markersize=10, label='Full') #plots the 82.5% plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='82.5%') plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.grid(1) # Turns the grid on plt.title('Plot of local KNN Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show()
def local_kmeans_class(I, L, x, k): from scipy.spatial.distance import cdist sizex = len(np.atleast_2d(x)) label = np.zeros((sizex,k)) for rowsx in range(0, sizex): tic() dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean') toc() center = np.zeros((10,k,28*28)) label_order = np.unique(L) l=0 tic() thing = np.zeros((k,28*28)) for labs in np.unique(L): indices = L == labs k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None) for i in range(0,k): M = I[indices] #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0) if i == 0: thing[i] = M[k_smallest[i+1]] else: thing[i] = thing[i-1] + M[k_smallest[i+1]] center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1)) l+=1 toc() for i in range(k): #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')) dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean') k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None) label[rowsx,i] = label_order[k_smallest[0]] return label
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data', 'rb')._checkReadable( ) == 0: # Check if file exist create it if it doesn't print("im here") x = center_matrix_SVD( digits.train_Images ) # Creates a class with our svd and associated info pickle.dump(x, open('Training SVD Data', 'wb')) else: x = pickle.load(open('Training SVD Data', 'rb')) # If we already have the file just load it if 0: test_Images_Center = np.subtract( digits.test_Images, np.repeat(x.centers, digits.test_Images.shape[0], 0)) tic() labels = local_kmeans_class( x.PCA[:, :50], digits.train_Labels, test_Images_Center @ np.transpose(x.V[:50, :]), 10) toc() pickle.dump(labels, open('Loc_kmeans_50_lab', 'wb')) loc_full = pickle.load(open('Loc_kmeans_Full_lab', 'rb')) loc_50 = pickle.load(open('Loc_kmeans_50_lab', 'rb')) labels_Full = pickle.load(open('KNN_Full', 'rb')) # Have to transpose these because they came out backwards should fix if i use this agian errors_full, ind_full = class_error_rate(np.transpose(loc_full), digits.test_labels) errors_50, ind_50 = class_error_rate(np.transpose(loc_50), digits.test_labels) errors_near, ind_50 = class_error_rate(labels_Full, digits.test_labels) plt.figure() plt.plot(np.arange(10) + 1, errors_full, color='Green', marker='o', markersize=10, label='Full') #plots the 82.5% plt.plot(np.arange(10) + 1, errors_50, color='Yellow', marker='o', markersize=10, label='82.5%') plt.plot(np.arange(10) + 1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.grid(1) # Turns the grid on plt.title('Plot of local KNN Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show()
def KNN(I, L, x, k, weights=1): from scipy import stats from scipy.spatial.distance import cdist """ I is the matrix of obs L are the labels x is what we are trying to classify k are how many neighbors to look at or whatever first we want to create a matrix of distances from each object we want to classify to every object in our training set """ sizex = len(np.atleast_2d(x)) label = np.zeros((k, sizex)) for rowsx in range(0, sizex): tic() dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean') # Now we should have all our distances in our dist array # The next step is to use this info to classify each unknown obj k_smallest = np.argpartition(dists, tuple(range(1, k + 1)), axis=None) if weights == 1: for i in range(0, k): label[i, rowsx] = stats.mode(L[k_smallest[:i + 1]])[0] else: labs = np.unique(L) myimage = x[rowsx].reshape(28, 28) for i in range(k): lab_weighted = np.zeros(np.unique(L).shape[0]) d = dists[k_smallest[:i + 2]][:, 0] weight_function = np.add( np.divide(d, np.subtract(np.min(d), np.max(d))), 1 - np.min(d) / np.subtract(np.min(d), np.max(d))) for p in range(0, labs.shape[0]): indices = inboth( np.arange(0, L.shape[0])[L == labs[p]], k_smallest[:i + 2]) lab_weighted[p] = np.sum( np.multiply(weight_function, indices)) label[i, rowsx] = labs[np.argmax(lab_weighted)] toc() print(rowsx) return label
def local_kmeans_class(I, L, x, k): from scipy.spatial.distance import cdist sizex = len(np.atleast_2d(x)) label = np.zeros((sizex, k)) for rowsx in range(0, sizex): tic() dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean') toc() center = np.zeros((10, k, 28 * 28)) label_order = np.unique(L) l = 0 tic() thing = np.zeros((k, 28 * 28)) for labs in np.unique(L): indices = L == labs k_smallest = np.argpartition(dists[indices], tuple(range(1, k)), axis=None) for i in range(0, k): M = I[indices] #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0) if i == 0: thing[i] = M[k_smallest[i + 1]] else: thing[i] = thing[i - 1] + M[k_smallest[i + 1]] center[l, :, :] = np.divide( thing, np.repeat(np.arange(1, 11).reshape(10, 1), 28 * 28, axis=1)) l += 1 toc() for i in range(k): #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')) dists2center = cdist(center[:, i, :], np.atleast_2d(x[rowsx]), metric='euclidean') k_smallest = np.argpartition(dists2center, tuple(range(1)), axis=None) label[rowsx, i] = label_order[k_smallest[0]] return label
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info pickle.dump(x,open('Training SVD Data','wb')) else: x = pickle.load(open('Training SVD Data','rb')) # If we already have the file just load it if 1: # if this is zero skip test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0)) tic() myLDA = LDA() # Create a new instance of the LDA class new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels) # It will fit based on x.PCA new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset Knn_labels = local_kmeans_class(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data toc() pickle.dump(Knn_labels,open('Loc_kmeans_fda_lab','wb')) fda = pickle.load(open('Loc_kmeans_fda_lab','rb')) labels_Full = pickle.load(open('KNN_Full','rb')) loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb')) errors_fda,ind_fda = class_error_rate(np.transpose(fda),digits.test_labels) errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels) errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels) labels_50 = pickle.load(open('KNN_50','rb')) errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels) print(errors_full) plt.figure() plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda Kmeans') #plots the 82.5% plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.plot(np.arange(10)+1, errors_full, color='Yellow', marker='o', markersize=10, label='Full Kmeans') plt.plot(np.arange(10)+1, errors_50, color='Red', marker='o', markersize=10, label='kNN 50') axes = plt.gca() axes.set_ylim([0.015,0.12]) plt.grid(1) # Turns the grid on plt.title('Plot of Local Kmeans with FDA Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show() project_back(x,digits)
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't print("im here") # Just wanted to check if it was going in here x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info pickle.dump(x,open('Training SVD Data','wb')) else: x = pickle.load(open('Training SVD Data','rb')) # If we already have the file just load it if 0: # if this is zero skip test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0)) tic() myLDA = LDA() # Create a new instance of the LDA class new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels) # It will fit based on x.PCA new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset Knn_labels, nearest = KNN(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data toc() pickle.dump(Knn_labels,open('FDAKNN_Lables','wb')) pickle.dump(nearest,open('FDAKNN_neastest','wb')) fda = pickle.load(open('FDAKNN_Lables','rb')) labels_Full = pickle.load(open('KNN_Full','rb')) labels_50 = pickle.load(open('KNN_50','rb')) errors_fda,ind_fda = class_error_rate(fda,digits.test_labels) errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels) errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels) plt.figure() plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda') #plots the 82.5% plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='kNN 50') plt.grid(1) # Turns the grid on plt.title('Plot of Knn with FDA Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show() print(confusion_matrix(digits.test_labels,labels_Full[5])) print(confusion_matrix(digits.test_labels,fda[5])) print(confusion_matrix(digits.test_labels,labels_50[5])) """
def KNN(I, L, x, k,weights = 1): from scipy import stats from scipy.spatial.distance import cdist """ I is the matrix of obs L are the labels x is what we are trying to classify k are how many neighbors to look at or whatever first we want to create a matrix of distances from each object we want to classify to every object in our training set """ sizex = len(np.atleast_2d(x)) label = np.zeros((k,sizex)) for rowsx in range(0, sizex): tic() dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean') # Now we should have all our distances in our dist array # The next step is to use this info to classify each unknown obj k_smallest = np.argpartition(dists,tuple(range(1,k+1)),axis=None) if weights == 1: for i in range(0,k): label[i,rowsx] = stats.mode(L[k_smallest[:i+1]])[0] else: labs = np.unique(L) myimage = x[rowsx].reshape(28,28) for i in range(k): lab_weighted = np.zeros(np.unique(L).shape[0]) d = dists[k_smallest[:i+2]][:,0] weight_function = np.add(np.divide(d, np.subtract(np.min(d),np.max(d))),1-np.min(d)/np.subtract(np.min(d),np.max(d))) for p in range(0,labs.shape[0]): indices = inboth(np.arange(0,L.shape[0])[L == labs[p]],k_smallest[:i+2]) lab_weighted[p]= np.sum(np.multiply(weight_function,indices)) label[i,rowsx] = labs[np.argmax(lab_weighted)] toc() print(rowsx) return label