test = af.data KNC = KNeighborsClassifier( n_neighbors=3, weights='uniform', ) KNC.fit(all_data[:, :-1], all_data[:, -1]) print(KNC.score(test[:, :-1], test[:, -1])) # 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1 - 1 catagory = np.zeros(len(all_data)) * -1 # catagory[3]= 0 # catagory[1] = -1 # catagory[2] = -1 # catagory[7] = -1 # catagory[10] = -1 # catagory[13] = -1 # catagory[14] = -1 weight_type = ['inverse_distance', 'no_weight'] KK = KNNClassifier(catagory, k=3, weight_type=weight_type[0], normalize=False) KK.fit(all_data[:, :-1], all_data[:, -1]) print('fitted') filepath = 'Magic_test.arff' af.load_arff(filepath) test = af.data print(KK.score(test[:, :-1], test[:, -1]))
norm_train_data[:, col] = where test_data = raw_data[int(raw_data.shape[0] * split) + 1:, :-1] norm_test_data = test_data.copy() test_labels = raw_data[int(raw_data.shape[0] * split) + 1:, -1] for col in range(norm_test_data.shape[1]): column = norm_test_data[:, col] col_min = min(column) col_max = max(column) where = np.where(True, (column - col_min) / (col_max - col_min), 0) norm_test_data[:, col] = where KNN_weight = KNNClassifier(k_val=15, label_type='nominal', col_types=mat.attr_types, weight_type='inverse_distance') KNN_weight.fit(norm_train_data, train_labels) weight_scores = KNN_weight.score(norm_test_data, test_labels) K_vals = np.arange(1, 17, 2) #plt.plot(K_vals, scores, label="non-weighted") plt.plot(K_vals, weight_scores, label="weighted") plt.title("Credit Approval") plt.ylabel("Accuracy") plt.xlabel("K nearest neighbors") plt.savefig("part5_plot_credit.png") plt.show()
from KNN import KNNClassifier from model_selection import train_test_split from metric import accuracy_score import pandas as pd import numpy as np if __name__ == '__main__': """ """ iris = pd.read_csv('../iris.data', header=None) iris_data = iris.loc[:, :].values x_data = iris_data[:, [0, 2]] y_data = iris_data[:, 4] x_predict = np.array([[5.1, 2.1]]) x_train, y_train, x_test, y_test = train_test_split(x_data, y_data) best_score = 0.0 best_k = -1 for k in range(1, 11): knn_clf = KNNClassifier(n_neighbors=k) knn_clf.fit(x_train, y_train) score = knn_clf.score(x_test, y_test) if score > best_score: best_k = k best_score = score print("best_k =", best_k) print('best_score =', best_score)
from KNN import KNNClassifier from tools.arff import Arff import numpy as np diabetes_train = "../data/KNN/diabetes.arff" diabetes_test = "../data/KNN/diabetes_test.arff" seismic_train = "../data/KNN/seismic-bumps_train.arff" seismic_test = "../data/KNN/seismic-bumps_test.arff" mat = Arff(seismic_train, label_count=1) mat2 = Arff(seismic_test, label_count=1) raw_data = mat.data h, w = raw_data.shape train_data = raw_data[:, :-1] train_labels = raw_data[:, -1] raw_data2 = mat2.data h2, w2 = raw_data2.shape test_data = raw_data2[:, :-1] test_labels = raw_data2[:, -1] KNN = KNNClassifier(15, "nominal", weight_type='inverse_distance') KNN.fit(train_data, train_labels) pred = KNN.predict(test_data) score = KNN.score(test_data, test_labels) print(f"Score: {score[1]*100:.2f}%") #np.savetxt("diabetes-prediction.csv", pred, delimiter=',', fmt="%i")