def PCA(test_Datamatrix, test_Labelmatrix, train_Datamatrix, train_Labelmatrix): mean = np.mean(train_Datamatrix, axis=0) z = train_Datamatrix - mean z_test = test_Datamatrix - np.mean(test_Datamatrix, axis=0) cov_matrix = np.cov(z, rowvar=0, bias=1) eigenValues, eigenVectors = np.linalg.eig(cov_matrix) indices = eigenValues.argsort()[::-1] eigenValues_sorted = eigenValues[indices] eigenVectors_sorted = eigenVectors[:, indices] r_values = [0.8, 0.85, 0.9, 0.95] for r in r_values: for i in range(0, 10304): B = sum(eigenValues_sorted) T = sum(eigenValues_sorted[:i]) if (T / B >= r): alpha = i break new_matrix = eigenVectors_sorted[:, 0:alpha + 1] pca_Train = np.dot(new_matrix.T, z.T) pca_Test = np.dot(new_matrix.T, z_test.T) pca_Train = np.asarray(pca_Train, dtype="int32") pca_Test = np.asarray(pca_Test, dtype="int32") print("R =" + str(r)) print("----------------------") KNNClassifier(pca_Train, train_Labelmatrix, pca_Test, test_Labelmatrix) print( "*****************************************************************************" )
def testBreastCancer(self): from KNN import KNNClassifier breast_cancer = load_breast_cancer() knn = KNNClassifier(k=5) X_train = breast_cancer.data y_train = breast_cancer.target accuracy = np.mean(cross_validation(knn, X_train, y_train)) print("KNN: accuracy =", accuracy)
def LDA(test_Datamatrix, test_Labelmatrix, train_Datamatrix, train_Labelmatrix): LDA_Matrix = [] for i in range(40): LDA_Matrix.append([]) j = -1 # edit hena print(len(train_Datamatrix)) for i in range(200): if (i % 5 == 0): j = j + 1 LDA_Matrix[j].append(train_Datamatrix[i]) LDA_matrix = np.asarray(LDA_Matrix, dtype="int32") sb = np.zeros((10304, 10304), dtype=np.float32) z = np.zeros((40, 5, 10304), dtype=np.float32) si = np.zeros((10304, 10304), dtype=np.float32) mean = np.zeros((40, 10304), dtype=np.float32) mean = (np.mean(LDA_matrix, axis=1)) Overall_mean = np.mean(mean, axis=0) for i in range(40): sb += (5 * (np.dot((mean[i] - Overall_mean).T, (mean[i] - Overall_mean)))) for i in range(40): z[i] = (LDA_matrix[i] - mean[i]) for i in range(40): si += (np.dot(z[i].T, z[i])) si = np.asarray(si) Sinv = np.linalg.inv(si) SinvB = np.matmul(Sinv, sb) eigenValues, eigenVectors = np.linalg.eig(SinvB) indices = eigenValues.argsort()[::-1] eigenValues_sorted = eigenValues[indices] eigenVectors_sorted = eigenVectors[:, indices] LDA_Train = np.dot(train_Datamatrix, eigenVectors_sorted) LDA_Test = np.dot(test_Datamatrix, eigenVectors_sorted) print("----------------------") # 3ayz 23ml el KNN classifier LDA_Train = np.asarray(LDA_Train, dtype="int32") LDA_Test = np.asarray(LDA_Test, dtype="int32") print("###############################") print(len(LDA_Train)) print(len(LDA_Train[0])) print(len(LDA_Test)) print(len(LDA_Test[0])) print("###############################") KNNClassifier(LDA_Train.T, train_Labelmatrix, LDA_Test.T, test_Labelmatrix) print( "*****************************************************************************" )
import numpy as np from KNN import KNNClassifier raw_data_X = [[3.393533211, 2.331273381], [3.110073483, 1.781539638], [1.343808831, 3.368360954], [3.582294042, 4.679179110], [2.280362439, 2.866990263], [7.423436942, 4.696522875], [5.745051997, 3.533989803], [9.172168622, 2.511101045], [7.792783481, 3.424088941], [7.939820817, 0.791637231] ] raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] X_train = np.array(raw_data_X) y_train = np.array(raw_data_y) x = np.array([8.093607318, 3.365731514]) x_predict=x.reshape(1,-1) knn_clf = KNNClassifier(k=6) print(knn_clf) knn_clf.fit(X_train,y_train) y_predict = knn_clf.predict(x_predict) print(y_predict)
test = af.data KNC = KNeighborsClassifier( n_neighbors=3, weights='uniform', ) KNC.fit(all_data[:, :-1], all_data[:, -1]) print(KNC.score(test[:, :-1], test[:, -1])) # 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1 - 1 catagory = np.zeros(len(all_data)) * -1 # catagory[3]= 0 # catagory[1] = -1 # catagory[2] = -1 # catagory[7] = -1 # catagory[10] = -1 # catagory[13] = -1 # catagory[14] = -1 weight_type = ['inverse_distance', 'no_weight'] KK = KNNClassifier(catagory, k=3, weight_type=weight_type[0], normalize=False) KK.fit(all_data[:, :-1], all_data[:, -1]) print('fitted') filepath = 'Magic_test.arff' af.load_arff(filepath) test = af.data print(KK.score(test[:, :-1], test[:, -1]))
from KNN import KNNClassifier from random import randrange # IMPORTANT: mnist.init() should be called the first time you run this script mnist.init() # convert type of numpy array elements into int16, avoiding subtraction overflow train_images, train_labels, test_images, test_labels = map( lambda x: x.astype(np.int16), mnist.load()) # initializing multi-process options # in Unix-like OS, multiprocessing is implemented based on fork(). # to take advantage of COW, knn instance and chunks should be global. # by using COW, memory cost can be reduced to 1/workers. # so multiprocessing.SharedMemory is no longer needed xD # however, such optimization may not work in Windows :( knn = KNNClassifier(10) knn.fit(train_images, train_labels) workers = 6 # IMPORTANT: should be number of physical cores of your PC test_size = test_images.shape[0] # 10000 chunk_size = 10 # size of each chunk chunks_num = 10 # total number of chunks chunks = [[test_images[i:i + chunk_size], test_labels[i:i + chunk_size]] for i in range(0, test_size, chunk_size) ] # split testing set into chunks for multi-process calculating # run KNN on a specific chunk # knn is an instance of KNNClassifier # pass k here to avoid multiprocess issues with Windows def process(chunk_id, k): print("chunk", chunk_id, "starts")
from KNN import KNNClassifier from model_selection import train_test_split from metric import accuracy_score import pandas as pd import numpy as np if __name__ == '__main__': """ """ iris = pd.read_csv('../iris.data', header=None) iris_data = iris.loc[:, :].values x_data = iris_data[:, [0, 2]] y_data = iris_data[:, 4] x_predict = np.array([[5.1, 2.1]]) x_train, y_train, x_test, y_test = train_test_split(x_data, y_data) best_score = 0.0 best_k = -1 for k in range(1, 11): knn_clf = KNNClassifier(n_neighbors=k) knn_clf.fit(x_train, y_train) score = knn_clf.score(x_test, y_test) if score > best_score: best_k = k best_score = score print("best_k =", best_k) print('best_score =', best_score)
norm_train_data[:, col] = where test_data = raw_data[int(raw_data.shape[0] * split) + 1:, :-1] norm_test_data = test_data.copy() test_labels = raw_data[int(raw_data.shape[0] * split) + 1:, -1] for col in range(norm_test_data.shape[1]): column = norm_test_data[:, col] col_min = min(column) col_max = max(column) where = np.where(True, (column - col_min) / (col_max - col_min), 0) norm_test_data[:, col] = where KNN_weight = KNNClassifier(k_val=15, label_type='nominal', col_types=mat.attr_types, weight_type='inverse_distance') KNN_weight.fit(norm_train_data, train_labels) weight_scores = KNN_weight.score(norm_test_data, test_labels) K_vals = np.arange(1, 17, 2) #plt.plot(K_vals, scores, label="non-weighted") plt.plot(K_vals, weight_scores, label="weighted") plt.title("Credit Approval") plt.ylabel("Accuracy") plt.xlabel("K nearest neighbors") plt.savefig("part5_plot_credit.png") plt.show()
from KNN import KNNClassifier from tools.arff import Arff import numpy as np diabetes_train = "../data/KNN/diabetes.arff" diabetes_test = "../data/KNN/diabetes_test.arff" seismic_train = "../data/KNN/seismic-bumps_train.arff" seismic_test = "../data/KNN/seismic-bumps_test.arff" mat = Arff(seismic_train, label_count=1) mat2 = Arff(seismic_test, label_count=1) raw_data = mat.data h, w = raw_data.shape train_data = raw_data[:, :-1] train_labels = raw_data[:, -1] raw_data2 = mat2.data h2, w2 = raw_data2.shape test_data = raw_data2[:, :-1] test_labels = raw_data2[:, -1] KNN = KNNClassifier(15, "nominal", weight_type='inverse_distance') KNN.fit(train_data, train_labels) pred = KNN.predict(test_data) score = KNN.score(test_data, test_labels) print(f"Score: {score[1]*100:.2f}%") #np.savetxt("diabetes-prediction.csv", pred, delimiter=',', fmt="%i")
h, w = raw_data.shape train_data = raw_data[:, :-1] train_labels = raw_data[:, -1].reshape(-1, 1) raw_data2 = mat2.data h2, w2 = raw_data2.shape test_data = raw_data2[:, :-1] test_labels = raw_data2[:, -1].reshape(-1, 1) train_data, test_data = normalize(train_data, test_data) # KNN = KNNClassifier(labeltype='classification', weight_type='inverse_distance',k=15) # KNN = KNNClassifier(labeltype='classification', weight_type='', k=3) # KNN = KNNClassifier(labeltype='regression', weight_type='', k=15) KNN = KNNClassifier(labeltype='regression', weight_type='inverse_distance', k=3) KNN.fit(train_data, train_labels) pred, shape = KNN.predict(test_data) score = KNN.score(test_data, test_labels) print(score) # np.savetxt("diabetes_prediction.csv", pred, delimiter=',',fmt="%i") # np.savetxt("seismic-bump-prediction_mine.csv", pred, delimiter=',', fmt="%i") ################# the following is used for credit data set ################## # mat = Arff("credit.arff", label_count=1) # data = mat.data[:, 0:-1] # labels = mat.data[:, -1].reshape(-1, 1) # attr_type = mat.attr_types # KNN = KNNClassifier(labeltype='HEOM', weight_type='', k=3, columntype=attr_type) # X, X_test, y, y_test = train_test_split(data, labels, test_size=0.25)