def train_model(model_name, X_train, Y_train, X_val, Y_val): """ Trains a supervised classifier using the training data provided, and scores it using the validation dataset. Param: - model_name: a string containing a model type - Train data: - X_train - Y_train - Validation data: - X_val - Y_val Return: - model: a supervised classifier, to be used for testing """ if model_name == 'svm': model = SVM() elif model_name == "random_forest": max_depth = 2 model = RandomForest(max_depth) elif model_name == "neural_network": max_depth = 2 out_size = 2 hidden_size = 30 in_size = X_train.shape[1] model = NeuralNetwork(in_size, hidden_size, out_size) elif model_name == "knn": n_neighbors = 50 model = KNearestNeighbors(n_neighbors) else: return "Error: Model not yet implemented..." print("Training " + model_name + "...") train_score = model.train(X_train, Y_train) valid_score = model.score(X_val, Y_val) print("Training Accuracy: %s" % train_score) print("Validation Accuracy: %s" % valid_score) return model
__author__ = "danil.gizdatullin" import config as conf from k_nearest_neighbors import KNearestNeighbors K_Nearest_Neighbors = KNearestNeighbors(file_name=conf.path_to_similarity_data) print K_Nearest_Neighbors.statistics(conf.number_of_nearest_neighbors, histogram=True, number_of_bins=200)
knn_misclassification_rate = np.zeros(3) for index, k in enumerate([1, 5, 10]): knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k) knn.fit(X_train, y_train) knn_result = knn.predict(X_test) knn_misclassification_rate[index] = np.mean(knn_result != y_test) plt.figure() plt.plot(1 / np.array([1, 5, 10]), knn_misclassification_rate, "-bo") plt.suptitle("Misclassification rate v.s. 1/k of KNN") plt.show() print(knn_misclassification_rate) # Question 5 # k_nearest_neighbors = KNearestNeighbors(1) k_nearest_neighbors.fit(X_train, y_train) knn_result = k_nearest_neighbors.predict(X_test) knn_naive_misclassification_rate = np.mean(knn_result != y_test) print(knn_naive_misclassification_rate) # Question 7 # w_knn_misclassification_rate = np.zeros(3) for index, k in enumerate([1, 5, 10]): knn = WeightedKNearestNeighbors(n_neighbors=k) knn.fit(X_train, y_train) knn_result = knn.predict(X_test) w_knn_misclassification_rate[index] = np.mean(knn_result != y_test) plt.figure() plt.plot(1 / np.array([1, 5, 10]), w_knn_misclassification_rate, "-bo") plt.suptitle("Misclassification rate v.s. 1/k of weighted KNN")
__author__ = 'danil.gizdatullin' # import numpy as np import matplotlib.pyplot as plt import time from k_nearest_neighbors import KNearestNeighbors def special_print(a): for key in a.iterkeys(): print("Key = %i" % key) print(a[key].dict) print("#####################") start_time = time.time() k_nn = KNearestNeighbors() # special_print(k_nn.k_nearest_dict) # special_print(k_nn.reverse_k_nearest_dict) distr = k_nn.return_distribution_k_nearest_neighbors() distr_reverse = k_nn.return_distribution_reverse_k_nearest_neighbors() print("--- %s seconds ---" % (time.time() - start_time)) # plt.hist(distr) plt.hist(distr_reverse, bins=100) plt.show()