def getBestK(X_train, y_train, X_val, y_val, nns=[30], print_train=True, print_val=True): acc_train = np.zeros((1, len(nns))) acc_val = np.zeros((1, len(nns))) for j in range(0, len(nns)): print j sys.stdout.flush() knn = KNNClassifier(nns[j]) knn.train(X_train, y_train) # acc_train[0, j] = np.mean(knn.predict(X_train) == y_train) print acc_train[0, j] sys.stdout.flush() y_pred = knn.predict(X_val) acc_val[0, j] = np.mean(y_pred == y_val) print acc_val[0, j] sys.stdout.flush() print "Confusion matrix:" print confusion_matrix(y_pred, y_val) if print_train: print (acc_train) if print_val: print (acc_val) best_val = np.max(acc_val) best_rate, best_reg = np.where(acc_val == np.amax(acc_val)) return (best_rate[0], best_reg[0]), knn
def knn(): X, y = make_blobs(centers=4, n_samples=500, n_features=2, shuffle=True) model = KNNClassifier(K=4) model.fit(X, y) res = model.predict(X) print(np.mean(res == y))
def main(): curr_dir = os.path.dirname(__file__) csv_file = os.path.join(curr_dir, 'data/play.csv') test = pd.Series({ 'Tempo': 'Chuva', 'Temperatura': 'Quente', 'Humidade': 'Normal', 'Vento': 'Forte' }) df = pd.read_csv(csv_file, index_col='Dia') X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar'] clf = KNNClassifier(k=1) clf.fit(X, y) print(f'RESULT k = {clf.k} ::', 'Jogar' if clf.predict(test) else 'Não Jogar') clf.k = 3 print(f'RESULT k = {clf.k} ::', 'Jogar' if clf.predict(test) else 'Não Jogar') print() print('DISTANCES') print(clf._gen_distances(test))
def main(): df = pd.read_csv('./diabetes.csv') normalized_data = normalize(df, 'Outcome') knn_classifier = KNNClassifier() for k in [5, 10]: results = {} for nn in [3, 5, 7]: knn_classifier.nn = nn knn_classifier.cross_validate( normalized_data, k, # k folds 1 # r ) results[str(nn)] = [ knn_classifier.accuracy, knn_classifier.f1_score ] plot_results(results) for nn in [3, 5, 7]: knn_classifier.nn = nn knn_classifier.cross_validate( normalized_data, 10, # k folds 10 # r ) print('\nGlobal accuracy: %.3f (%.3f)' % (knn_classifier.accuracy, knn_classifier.accuracy_std)) print('Global f1 score accuracy: %.3f (%.3f)\n' % (knn_classifier.f1_score, knn_classifier.f1_score_std)) results[str(nn)] = [knn_classifier.accuracy, knn_classifier.f1_score] plot_results(results)
def test_knn(self): knn_model = KNNClassifier(all_monomials_with_maximum_degrees([1, 1, 1]), 1) knn_model.train(np.array([[1], [10], [2], [30]]), np.array([1, 0, 1, 0])) self.assertEqual( knn_model._trained, True, "Initially False. Change this property to True after self.train() is called", ) self.assertIn( type(knn_model.predict(np.array([1]))), (np.float64, float), "Return type of predict() is np.float64 or float", ) self.assertIn( type( knn_model.evaluate( np.array([[1], [10], [2], [20]]), np.array([1, 0, 1, 0]) ) ), (float, np.float64, int, np.int64), "Return type of evaluate() is np.float64 or float", ) self.assertIn( knn_model.evaluate( np.array([[1], [10], [2], [30]]), np.array([1, 0, 1, 0]) ), (1.0,1), "evaluate() for same data returns 1.0", )
def test(): train_data, train_labels = mnist.load_mnist(mode='train', path='data/') # test_data, test_labels = mnist.load_mnist(mode='test', path='data/') errors = np.array( knn.tune_hyperparams(train_data[:1000], train_labels[:1000])) X = errors[:, 0] Y = errors[:, 1] Z = errors[:, 2] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(X, Y, Z, c='r', marker='o') ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') plt.show()
def run(k, mode='knn', distance='euclidean', keep_punc=False, keep_stopwords=False): from knn import KNNClassifier from ncc import NearestCentroidClassifier if mode == 'knn': clf = KNNClassifier(k, distance=distance, keep_stopwords=keep_stopwords, keep_punc=keep_punc) clf.score() elif mode == 'ncc': clf = NearestCentroidClassifier(k, distance=distance, keep_stopwords=keep_stopwords, keep_punc=keep_punc) clf.score()
def training(q, DTrain, RTrain, **args): ''' @input topic document q ∈ Q, training collection Dtrain, judgments Rtrain, and optional arguments on the classification process @behavior learns a classification model to predict the relevance of documents on the topic q using Dtrain and Rtrain, where the training process is subjected to proper preprocessing, classifier’s selection and hyperparameterization @output q-conditional classification model ''' classifier_type = args.get('classifier_type') if classifier_type == 'logistic': hyper_parameters = args.get('hyper_paremeters') classifier = LogisticClassifier(hyper_parameters=hyper_parameters) elif classifier_type == 'XGBOOST': classifier = XGBOOSTClassifier() elif classifier_type == 'MLP': classifier = MLPerceptronClassifier() elif classifier_type == 'KNN': classifier = KNNClassifier() classifier.train(q, DTrain, RTrain) return classifier
print(_, nFeats) # Values of parameter k to iterate over. K_VALS = [3, 5, 7, 9, 11, 13, 15] starttime = time.time() # Repeat each trial 10 times. for i in range(0, 10): x_train, x_test, y_train, y_test = train_test_split(X, y,\ test_size=0.2) """ Try non-optimized methods. """ # Vanilla KNN. for k in K_VALS: reg = KNNClassifier(x_train, y_train, k) y_pred = reg.predict(x_test) mse_iter = accuracy(y_test, y_pred) print("xx,knn,", k, ",", mse_iter) # Distance-weighted KNN. for k in K_VALS: reg = DwKNNClassifier(x_train, y_train, k) y_pred = reg.predict(x_test) mse_iter = accuracy(y_test, y_pred) print("xx,dknn,", k, ",", mse_iter) """ PCA with KNN. """ pca = PCA(n_components=4) pca.fit(x_train.copy())
# Perform dimensionality reduction # X_train_reduced = svd.fit_transform(X_train_tfidf) # Keep results # prec, rec, f1, accu = ([] for i in range(4)) # Use 10-fold and find metrics # for train, test in kf.split(X_train_reduced, X_train_le): X_train = X_train_reduced[train] y_train = X_train_le[train] X_test = X_train_reduced[test] y_test = X_train_le[test] clf_KNN = KNNClassifier(100) # Train model # clf_KNN.fit(X_train, y_train) # Predict categories # y_pred = clf_KNN.predict(X_test) # Save scores # prec.append(precision_score(y_test, y_pred, average='macro')) rec.append(recall_score(y_test, y_pred, average='macro')) f1.append(f1_score(y_test, y_pred, average='macro')) accu.append(accuracy_score(y_test, y_pred)) # Print results to csv # Evaluation_metric_df = pd.read_csv('EvaluationMetric_10fold.csv', sep="\t")
#if (nFeats > 15) or (_ > 4000): # continue print("Number of samples: ",_, "Number of features: ", nFeats) #print("X :") #print(X) #print("y :") #print(y) print("Splitting training and test sets:") #Test without scaling print("Testing without scaling") x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) x_train, x_verif, y_train, y_verif = train_test_split(x_train, y_train, test_size=0.33) clf = KNNClassifier(x_train, y_train, 5) y_pred = clf.predict(x_test) print("Accuracy = ", accuracy_score(y_test, y_pred)) #Run GA to find best weights N_init_pop = 30 _, nFeats = np.shape(x_train) weight_pso = GBestPSO(nFeats, N_init_pop) pos = weight_pso.get_positions() pbest = weight_pso.get_pbest() pbest_metric_array = np.empty(N_init_pop) pos_metric_array = np.empty(N_init_pop) #Set pbest metrics for i in range(len(pbest)):
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from knn import KNNClassifier from module_selection import train_test_split iris = datasets.load_iris() #print(iris.keys()) X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y) my_knn_clf = KNNClassifier(k=3) my_knn_clf.fit(X_train, y_train) y_predict = my_knn_clf.predict(X_test) #accuracy accuracy = sum(y_predict == y_test) / len(y_test) print(accuracy) # using sklean KNeighborsClassifier and model_selection """ import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split iris = datasets.load_iris()
def lbest_pso_run_ent(x_train, y_train, x_test, y_test, x_verif, y_verif, k): #Run PSO to find best weights N_init_pop = 50 _, nFeats = np.shape(x_train) weight_pso = LBestPSO(nFeats, N_init_pop) pos = weight_pso.get_positions() pbest = weight_pso.get_pbest() pbest_metric_array = np.empty(N_init_pop) pos_metric_array = np.empty(N_init_pop) #Set pbest metrics for i in range(len(pbest)): #Scale input data scaled_x_train = np.multiply(x_train, pbest[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, pbest[i]) #Method 1 clf = KNNClassifier(scaled_x_train, y_train, k) neighbors = clf.find_all_neighbors(scaled_x_verif) nbh_ent = clf.find_neighborhood_entropy(neighbors) pbest_metric_array[i] = nbh_ent weight_pso.set_pbest_fitness(pbest_metric_array) #Set pos metrics for i in range(len(pbest)): #Scale input data scaled_x_train = np.multiply(x_train, pos[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, pos[i]) #Method 1 clf = KNNClassifier(scaled_x_train, y_train, k) neighbors = clf.find_all_neighbors(scaled_x_verif) nbh_ent = clf.find_neighborhood_entropy(neighbors) pos_metric_array[i] = nbh_ent weight_pso.set_p_fitness(pos_metric_array) #Set initial gbest. weight_pso.set_init_best(pos_metric_array) count = 0 while (count < 50): count += 1 weight_pso.optimize() #get_population weight_pop = weight_pso.get_positions() metric_array = np.empty(N_init_pop) #evaluate and set fitness for i in range(len(weight_pop)): #Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) #Method 1 clf = KNNClassifier(scaled_x_train, y_train, k) neighbors = clf.find_all_neighbors(scaled_x_verif) nbh_ent = clf.find_neighborhood_entropy(neighbors) metric_array[i] = nbh_ent weight_pso.set_p_fitness(metric_array) weight_pso.set_best(metric_array) #get_best_sol best_metric = weight_pso.get_gbest_fit() best_weights = weight_pso.get_gbest() # Concatenate training and verification sets. x_train = np.concatenate((x_train, x_verif), axis = 0) y_train = np.concatenate([y_train, y_verif]) # Print the results of KNN. clf = KNNClassifier(np.multiply(x_train, best_weights), y_train, k) y_pred = clf.predict(np.multiply(x_test, best_weights)) mse_iter = accuracy(y_test, y_pred) print("lbest-pso-ent,knn,", k, ",", mse_iter) # Print the results of KNN. clf = DwKNNClassifier(np.multiply(x_train, best_weights), y_train, k) y_pred = clf.predict(np.multiply(x_test, best_weights)) mse_iter = accuracy(y_test, y_pred) print("lbest-pso-ent,dknn,", k, ",", mse_iter)
points.append([float(inp[0]), float(inp[1]), int(inp[2])]) random.shuffle(points) n_points = len(points) points = chunkify(points) acc1, acc2 = [], [] for i in range(NUMBER_OF_FOLDS): train, test = [], [] for j in range(NUMBER_OF_FOLDS): if (i != j): train.extend(points[j]) else: test = points[j] knn = KNNClassifier() svm = SVM() train2 = copy.deepcopy(train) knn.fit(train) svm.fit_transform(train2) k_right, s_right = 0, 0 for point in test: k_pred = knn.predict([point[0], point[1]]) s_pred = svm.predict([point[0], point[1]]) s_pred = (s_pred + 1) / 2 if (k_pred == point[2]): k_right = k_right + 1 if (s_pred == point[2]): s_right = s_right + 1 acc1.append(float(k_right / len(test))) acc2.append(float(s_right / len(test)))
train_imgs = utils.read_folder(TRAIN_DIR, 0, ntrain, flatten=False) print ("\nDone!") sys.stdout.flush() X = train_imgs X = X.reshape((ntrain, -1)) #X = np.insert(X, 0, 1.0, axis = 1) y = utils.read_labels('trainLabels.csv', 0, ntrain) X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size = 0.1) nns = [1] #utils.getBestK(X_train, y_train, X_val, y_val, nns) knn = KNNClassifier(nns[0]) knn.train(X_train, y_train) print "X_val shape: ", X_val.shape print "y_val shape: ", y_val.shape pred = knn.predict(X_val) print "Accuracy: ", np.mean(pred == y_val) #uncomment this to visualize knn prediction - 10 examples from each class """ examples = np.zeros((10,10,32,32,3)) for i in range(10): examples[i] = ((X_val[pred==i])[0:10]).reshape(10,32,32,3) num_classes = len(classes) nexamples = 10 for y, cls in enumerate(classes): idxs = np.arange(nexamples) for i, idx in enumerate(idxs):
else: matrix = evaluator.compute().cpu() plotconfmat(dataset_name.split(","), matrix, rootname + ".png") torch.save(matrix, rootname + ".pt") # T.to_pil_image(matrix).save(f"matrix_{classifier_name}_dataset{i:02}.png") import json with open(rootname + ".json", "w+") as f: json.dump(outputs, f) solver.cpu() classifiers = { "knn5": KNNClassifier(k=5), "knn10": KNNClassifier(k=10), # "relpnet_1": M.RelationNetClassifier_Protonet1(simnet_channels=[128, 64, 32]), # "relation_1": M.RelationNetClassifier(in_channels=416, feature_channels=[10], simnet_channels=[32, 16, 4]), # "reg_relation_1": M.RelationNetClassifier(in_channels=416, feature_channels=[10], simnet_channels=[32, 16, 4]), # "protonet_1": M.ProtonetClassifier(in_channels=416, mid_channels=[], out_channels=32), # "protonet_2": M.ProtonetClassifier(in_channels=416, mid_channels=[64], out_channels=32), # "protonet_3": M.ProtonetClassifier(in_channels=416, mid_channels=[128, 64], out_channels=32), # "protonet_4": M.ProtonetClassifier(in_channels=416, mid_channels=[256, 128, 64], out_channels=32), # "protonet_bottleneck_end": M.ProtonetClassifier(in_channels=416, mid_channels=[256, 128, 64], out_channels=10), # "protonet_bottleneck_mid": M.ProtonetClassifier(in_channels=416, mid_channels=[128, 32, 128], out_channels=32), # "simnet_simple": M.SimnetClassifier(in_channels=416, channels=[10]),
def ga_run_std(x_train, y_train, x_test, y_test, x_verif, y_verif, k): # Run GA to find best weights. N_init_pop = 50 N_crossover = 50 N_selection = 20 improv_thresh = 1e-3 _, nFeats = np.shape(x_train) weight_ga = GeneticAlgorithm(nFeats, N_init_pop, mu=0.1) weight_pop = weight_ga.get_population() metric_array = np.empty(N_init_pop) # Create the initial population. for i in range(len(weight_pop)): # Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) # Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) # Classifier. clf = KNNClassifier(scaled_x_train, y_train, k) neighbors = clf.find_all_neighbors(scaled_x_verif) nbh_ent = clf.find_neighborhood_std(neighbors) metric_array[i] = nbh_ent # Update fitness in GA object. weight_ga.set_fitness(metric_array) weight_ga.selection(N_selection) new_best_metric = 2.5 # while (best_metric - new_best_metric) > improv_thresh: count = 0 while (count < 20): count += 1 best_metric = new_best_metric # Crossover. weight_ga.crossover(N_crossover) # Get new population. weight_pop = weight_ga.get_population() metric_array = np.empty(N_crossover) # Evaluate and set fitness. for i in range(len(weight_pop)): # Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) # Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) # Classifier. clf = KNNClassifier(scaled_x_train, y_train, k) neighbors = clf.find_all_neighbors(scaled_x_verif) nbh_ent = clf.find_neighborhood_std(neighbors) metric_array[i] = nbh_ent # Update fitness in GA object weight_ga.set_fitness(metric_array) # get_best_sol best_weights, new_best_metric = weight_ga.best_sol() #print("Metric of this iteration are: ", new_best_metric) weight_ga.selection(N_selection) # print("Best weights = ", best_weights, "\tBest metric = ", new_best_metric) # Test with scaling after GA # Concatenate training and verification sets. x_train = np.concatenate((x_train, x_verif), axis=0) y_train = np.concatenate([y_train, y_verif]) # Print the results of KNN. clf = KNNClassifier(np.multiply(x_train, best_weights), y_train, k) y_pred = clf.predict(np.multiply(x_test, best_weights)) acc = accuracy(y_test, y_pred) print("ga-std,knn,", k, ",", acc) # Print the results of KNN. clf = DwKNNClassifier(np.multiply(x_train, best_weights), y_train, k) y_pred = clf.predict(np.multiply(x_test, best_weights)) mse_iter = accuracy(y_test, y_pred) print("ga-std,dknn,", k, ",", mse_iter)