def rank_query(features, query_idx, gallery_idx, file_list, labels, cam_idx, rank=1, display=False, cluster_means=None): feat_query, feat_gallery = set_feat_query_gallery(features, query_idx, gallery_idx) rank_score = np.zeros(len(feat_query), dtype=int) color = np.zeros(rank + 1, dtype=int) i = 0 for idx in query_idx: query_id = labels[idx] cam_id = cam_idx[idx] feat_gall_cam_rem, gall_cam_rem_idx = rem_feat_cam_label(feat_gallery, gallery_idx, query_id, cam_id, labels, cam_idx) if cluster_means is None: k_idx = knn(np.array(features[idx]), np.array(feat_gall_cam_rem), k=rank) else: cluster_idx = np.asscalar(knn(np.array(features[idx]), np.array(cluster_means))) k_idx = knn(np.array(cluster_means[cluster_idx]), np.array(feat_gall_cam_rem), k=rank) gallery_id = labels[gall_cam_rem_idx[k_idx]] file_idx = np.concatenate((idx, gall_cam_rem_idx[k_idx]), axis=None) for j in range(rank): if query_id == gallery_id[j]: color[j+1] = 1 rank_score[i] = 1 else: color[j+1] = 2 print('-- Query:', query_id, '/ Gallery:', gallery_id, '/ Retrieval:', rank_score[i]) if display: result_display(rank, color, file_list[file_idx]) i += 1 rank_score = np.mean(rank_score, axis=None) return rank_score
def test_machine(data_test, machine, n_p=52): rows, cols = data_test.shape n_pp_test = int(cols / n_p) mach_size = len(machine) p_id_pred_ar = np.zeros(mach_size, dtype=int) prec = 0 eps = np.zeros((mach_size, cols)) y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int) for j in range(0, cols): test = data_test[:, j] y_actu[j] = j // n_pp_test for t in range(0, mach_size): test_proj = machine[t].w.T.dot((test - machine[t].mu)[:, None]) indices = knn(test_proj, machine[t].data_train_proj) p_id_pred_ar[t] = mode(machine[t].data_id_memory[indices])[0] if y_actu[j] != p_id_pred_ar[t]: eps[t, j] += 1 y_pred[j] = mode(p_id_pred_ar)[0] if y_actu[j] == y_pred[j]: prec += 1 prec /= cols print('Precision is %.2f%%' % (100 * prec)) e_av = np.mean(np.mean(np.square(eps), axis=1)) e_com = np.mean(np.square(np.mean(eps, axis=0))) print('The average error of each machine member by acting individually is Eav = %.2f' % e_av) print('The expected error of the whole machine is Ecom = %.2f' % e_com) if e_com <= e_av: print('We have Ecom <= Eav \n -- Success!! Machine performs better than individual members. Good teamwork!') else: print('We have Ecom > Eav \n -- Failure... You need to review your teamwork.') conf_mat(y_actu, y_pred)
def test_pca_lda(data_test, data_id_memory, data_train_proj, w, mu, n_p=52): rows, cols = data_test.shape n_pp_test = int(cols / n_p) prec = 0 y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int) for j in range(0, cols): test = data_test[:, j] test_proj = w.T.dot((test - mu).reshape(rows, 1)) index = knn(test_proj, data_train_proj) y_actu[j] = j // n_pp_test y_pred[j] = data_id_memory[index] if y_actu[j] == y_pred[j]: prec += 1 prec /= cols print('Precision is %.2f%%' % (100 * prec)) conf_mat(y_actu, y_pred)
dataframe = functions.load_dataframe() # Dividindo conjuntos de treino e teste X_train, X_test, y_train, y_test = functions.train_test(dataframe) # Modelo PCA para extração de features da imagem pca = functions.pca_model(X_train) # Conjunto de treino com features extraídas X_train = pca.transform(X_train) # Conjunto de teste com features extraídas X_test = pca.transform(X_test) # Treinando modelo classificatório KNN knn = functions.knn(X_train, y_train) # Rótulo das classificações label = { 0: "acho que nao...", 1: "acho que sim..." } # Abrindo a webcam... while True: # Lendo a imagem e extraindo frame status, frame = cam.read() if not status: break
Y = Y.to_numpy() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=5) print('Ridge Linear Regression') alpha_list = [0, .1, 0.3, .5, 1, 1.2] functions.ridge_regression(X_train, X_test, Y_train, Y_test, alpha_list, True, cfg.default.student_figures, 'ridge_reg') print('KNN') k_values = [1, 2, 5, 7, 10] functions.knn(X_train, X_test, Y_train, Y_test, k_values, True, ['uniform', 'distance'], cfg.default.student_figures, 'knn') print('Decission Tree Regression') max_depths = [1, 10, 30, 50, 100, 300] min_weight_fraction_leafs = [.0, .125, .25, .375, .5] min_samples_leaf=[1, 10, 100, 200] functions.decision_tree(X_train, X_test, Y_train, Y_test, max_depths, min_weight_fraction_leafs, min_samples_leaf, cfg.default.student_figures, 'dtree') print('MLP') scaler = preprocessing.StandardScaler().fit(X_train) X_train_scaled = scaler.transform(X_train) X_test_scaled = scaler.transform(X_test)
test = test.replace('\r','') #Removes the carriage return cuz I use windows #Load the data in with open(train,"r") as read_file: train = json.load(read_file) with open(test,"r") as read_file: test = json.load(read_file) with open(val,"r") as read_file: val = json.load(read_file) #Loop through k accuracy = np.zeros(kmax+1) for k_index in range(1,kmax+1): nn = [] winners = [] nn = knn(k_index,train,val) winners = predict_label(train,val,nn) accuracy[k_index] = calculate_accuracy(val,winners) print(k_index, accuracy[k_index], sep = "," ) #Print optimal k opt_k = np.argmax(accuracy) print(opt_k) #Find accuracy on test set #Train on train + val for i in val['data']: train['data'].append(i) nn = knn(opt_k,train,test) winners = predict_label(train,test,nn) test_accuracy = calculate_accuracy(test,winners)
file_name = "haarcascade_frontalface_alt2.xml" classifier = cv.CascadeClassifier( f"{cv.haarcascades}/{file_name}") #Modelo para reconhecer faces dataframe = functions.load_dataframe( ) #Carregando dataframe com as imagens para treinamento X_train, X_test, y_train, y_test = functions.train_test( dataframe) #Dividindo conjuntos de treino e teste pca = functions.pca_model( X_train) #Modelo PCA para extração de features da imagem X_train = pca.transform(X_train) #Conjunto de treino com features extraídas X_test = pca.transform(X_test) #Conjunto de teste com features extraídas knn = functions.knn(X_train, y_train) #Treinando modelo classificatório KNN #Rótulo das classificações label = {0: "Sem mascara", 1: "Com mascara"} #Abrindo a webcam... while True: status, frame = cam.read() #Lendo a imagem e extraindo frame if not status: break if cv.waitKey(1) & 0xff == ord('q'): break #Transformando a imagem em escala de cinza
import functions as fs import numpy as np import matplotlib.pyplot as plt import time import pdb t1 = time.time() train, test = fs.init_data() trainSet, testSet = fs.data_ready1(train, test) trainSetf1, testSetf1 = fs.feat1(trainSet, testSet) #trainSetf1, testSetf1 = fs.feat2(trainSet, testSet, dX=3) result = fs.knn(trainSetf1, testSetf1, k=1) acc, pre, rec, f1 = fs.calcMeasure(result) t2 = time.time() print(t2 - t1) print(acc.mean()) print(f1.mean())
def n_features(input_excel, ark, y_navn): ''' Classification using thirteen classifiers defined in the script "functions". Uses 4-folds-CV ten times with different splits each times. Tries different number of features from 1-20. :param str input_excel: The name of the excel-file with the dataset :param str ark: The name of the sheet with the dataset :param str y_navn: The name of the column with the response :return: Matrix with the AUC of all classificatons for different number of features and matrix with the selected features ''' # Reads the excel-file xls = pd.ExcelFile(input_excel) data_raw_df = pd.read_excel(xls, sheetname=ark, index_col=0) # Creates the result-matrix results = [[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []], [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]] for j in range(0, 2): for i in range(0, 20): results[j][i] = np.zeros((13, 4)) # Splits the respons y and the variables X and sets the random states y_name = y_navn y = data_raw_df[y_name].values X = data_raw_df.drop(y_name, 1) stdsc = StandardScaler() colNames = list(X.columns) states = [209, 979] # Change to wanted seeds features = [] stdsc = StandardScaler() # Splits the dataset into the 2*4 folders and selects features and uses the classifers # for 1-20 number of features- for l in range(0, 2): state = states[l] cv = StratifiedKFold(n_splits=4, random_state=state, shuffle=True) for k in range(0, 20): i = 0 n_features = k + 1 for train, test in cv.split(X, y): print(k, i) X_train = X.iloc[train] X_test = X.iloc[test] y_train = y[train] y_test = y[test] X_std_train = stdsc.fit_transform(X_train) X_std_test = stdsc.transform(X_test) X_std_train, X_std_test = relieff(X_std_train, X_std_test, y_train, n_features, colNames, features) model = logrel1(X_std_train, X_std_test, y_train, y_test, state) results[l][k][0, i] = model.score(X_std_test, y_test) model = logrel2(X_std_train, X_std_test, y_train, y_test, state) results[l][k][1, i] = model.score(X_std_test, y_test) model = rf(X_train, y_train, X_test, y_test, state) results[l][k][2, i] = model.score(X_test, y_test) model = knn(X_std_train, X_std_test, y_train, y_test) results[l][k][3, i] = model.score(X_std_test, y_test) model = adaboostlog(X_std_train, X_std_test, y_train, y_test, state=state) results[l][k][4, i] = model.score(X_std_test, y_test) model = decisiontree(X_std_train, y_train, X_std_test, y_test, state) results[l][k][5, i] = model.score(X_std_test, y_test) model = gnb(X_std_train, X_std_test, y_train, y_test, state=state) results[l][k][6, i] = model.score(X_std_test, y_test) model = lda(X_std_train, X_std_test, y_train, y_test) results[l][k][7, i] = model.score(X_std_test, y_test) model = qda(X_std_train, X_std_test, y_train, y_test) results[l][k][8, i] = model.score(X_std_test, y_test) model = nnet(X_std_train, X_std_test, y_train, y_test, state=state) results[l][k][9, i] = model.score(X_std_test, y_test) model = mars(X_std_train, X_std_test, y_train, y_test) results[l][k][10, i] = model.score(X_std_test, y_test) model = plsr(X_std_train, X_std_test, y_train, y_test) results[l][k][11, i] = model.score(X_std_test, y_test) model = svc(X_std_train, X_std_test, y_train, y_test, state) results[l][k][12, i] = model.score(X_std_test, y_test) model = linearsvc(X_std_train, X_std_test, y_train, y_test, state) results[l][k][13, i] = model.score(X_std_test, y_test) i += 1 return (results)
def classify(input_excel, ark, y_navn, n_features): ''' Classification using thirteen classifiers defined in the script "functions". Uses 4-folds-CV ten times with different splits each times. Uses given number of features and chosen feature selector. :param str input_excel: The name of the excel-file with the dataset :param str ark: The name of the sheet with the dataset :param str y_navn: The name of the column with the response :param int n_features: Number of features to use in the models :return: Matrix with the AUC of all classificatons and matrix with the selected features ''' # Reads the excel-file xls = pd.ExcelFile(input_excel) data_raw_df = pd.read_excel(xls, sheetname=ark, index_col=0) # Creates the result-matrix results = [[], [], [], [], [], [], [], [], [], []] for i in range(0, 10): results[i] = np.zeros((13, 4)) # Splits the respons y and the variables X and sets the random states y_name = y_navn y = data_raw_df[y_name].values X = data_raw_df.drop(y_name, 1) colNames = list(X.columns) states = [108, 355, 44, 129, 111, 362, 988, 266, 82, 581] # Change to wanted seeds features = [] stdsc = StandardScaler() # Splits the dataset into the 10*4 folders and selects features and uses the classifers for k in range(0, 10): i = 0 state = states[k] cv = StratifiedKFold(n_splits=4, random_state=state, shuffle=True) for train, test in cv.split(X, y): print(k, i) X_train = X.iloc[train] X_test = X.iloc[test] y_train = y[train] y_test = y[test] X_std_train = stdsc.fit_transform(X_train) X_std_test = stdsc.transform(X_test) X_std_train, X_std_test, features = relieff( X_std_train, X_std_test, y_train, n_features, colNames, features) model = logrel1(X_std_train, X_std_test, state) print('Test score L1-Logistic regression:', model.score(X_std_test, y_test)) results[k][0, i] = model.score(X_std_test, y_test) model = logrel2(X_std_train, X_std_test, state) print('Test score L2-Logistic regression:', model.score(X_std_test, y_test)) results[k][1, i] = model.score(X_std_test, y_test) model = rf(X_train, y_train, state) print('Test score Random forest:', model.score(X_test, y_test)) results[k][2, i] = model.score(X_test, y_test) model = knn(X_std_train, y_train) print('Test score KNN:', model.score(X_std_test, y_test)) results[k][3, i] = model.score(X_std_test, y_test) model = adaboostlog(X_std_train, y_train, state=state) print('Test score AdaBoost:', model.score(X_std_test, y_test)) results[k][4, i] = model.score(X_std_test, y_test) model = decisiontree(X_std_train, y_train, state) print('Test score Decision Tree:', model.score(X_test, y_test)) results[k][5, i] = model.score(X_std_test, y_test) model = gnb(X_std_train, y_train, state=state) print('Test score GNB:', model.score(X_std_test, y_test)) results[k][6, i] = model.score(X_std_test, y_test) model = lda(X_std_train, y_train) print('Test score Linear LDA:', model.score(X_std_test, y_test)) results[k][7, i] = model.score(X_std_test, y_test) model = qda(X_std_train, y_train) print('Test score QDA:', model.score(X_std_test, y_test)) results[k][8, i] = model.score(X_std_test, y_test) model = nnet(X_std_train, y_train, state=state) print('Test score Neural network:', model.score(X_std_test, y_test)) results[k][9, i] = model.score(X_std_test, y_test) model = mars(X_std_train, y_train) print('Test score MARS:', model.score(X_std_test, y_test)) results[k][10, i] = model.score(X_std_test, y_test) model = plsr(X_std_train, y_train) print('Test score PLSR:', model.score(X_std_test, y_test)) results[k][11, i] = model.score(X_std_test, y_test) model = svc(X_std_train, X_std_test, y_train, y_test, state) print('Test score SVC:', model.score(X_std_test, y_test)) results[k][12, i] = model.score(X_std_test, y_test) model = linearsvc(X_std_train, y_train, state) print('Test score Linear SVC:', model.score(X_std_test, y_test)) results[k][13, i] = model.score(X_std_test, y_test) i += 1 return (results, features)
from sklearn.neighbors import KNeighborsClassifier import functions as fs import numpy as np train, test = fs.init_data() trainSet, testSet = fs.data_ready2(train, test) result = fs.knn(trainSet, testSet, 3000) #label = np.tile(np.arange(0,10),(300,1)) #knn = KNeighborsClassifier(n_neighbors=10) #knn.fit(trainSet, label.T.flatten()) #print(testSet) #result = knn.predict(testSet) #result = result.reshape(10,100).T acc, pre, rec, f1 = fs.calcMeasure(result) #print(acc, pre, rec, f1) print(acc, end="\n\n") print(f1)
y, test_size=0.2, random_state=5) print('Ridge Linear Regression') alpha_list = [0, .1, 0.3, .5, 1, 1.2] functions.ridge_regression(X_train, X_test, y_train, y_test, alpha_list, True, cfg.default.real_estate_figures, 'ridge_reg') print('KNN') k_values = [1, 2, 5, 7, 10] functions.knn(X_train, X_test, y_train, y_test, k_values, True, ['uniform', 'distance'], cfg.default.real_estate_figures, 'knn') print('Decission Tree Regression') max_depths = [1, 10, 30, 50, 100, 300] min_weight_fraction_leafs = [.0, .125, .25, .375, .5] min_samples_leaf = [1, 10, 100, 200] functions.decision_tree(X_train, X_test, y_train, y_test, max_depths, min_weight_fraction_leafs, min_samples_leaf, cfg.default.real_estate_figures, 'dtree') print('MLP') scaler = preprocessing.StandardScaler().fit(X_train) X_train_scaled = scaler.transform(X_train)
percentage_test = 0.2 validation_methods = ['holdout', 'cross-validation'] baselines = ['stratified', 'uniform'] path = cfg.default.amazon_figures # %% #%% k-Nearest Neighbor Classification # k-nn if True: functions.knn(X=training_data_x, y=training_data_y_encoded, test_size=percentage_test, random_state=random_seed, list_k=[1, 2, 5, 8, 9, 10, 11, 12, 15, 20], scaling=True, weights=['uniform', 'distance'], validation_methods=validation_methods, baselines=baselines, path=path, filename='knn') if True: # Plot performance (efficiency and effectiveness) functions.plot_evaluation_knn(path, 'knn') if True: # For cross-validation scatter-plot fit time mean and score time functions.plot_efficiency_knn(path, 'knn') if True: # For cross-validation scatter-plot accuracy mean and standard deviation
#!/usr/bin/python3.6 import json import numpy as np import sys import argparse from functions import knn, display_winner parser = argparse.ArgumentParser(description='Implement a k-NN algorithm') parser.add_argument( "-k", type=int, help="Number of nearest neighbors to look for in choose(n,k)") parser.add_argument('-train', type=str, help='Train data set path') parser.add_argument('-test', type=str, help='Test data set path') args = parser.parse_args() k = args.k train = args.train test = args.test test = test.replace('\r', '') #Removes the carriage return cuz I use windows #Load the data in with open(train, "r") as read_file: train = json.load(read_file) with open(test, "r") as read_file: test = json.load(read_file) nn = knn(k, train, test) display_winner(train, test, nn)
#%% Data scaling (remove mean and scale to unit variance) # scaler = preprocessing.StandardScaler().fit(X_train) # X_train_scaled = scaler.transform(X_train) # X_test_scaled = scaler.transform(X_test) validation_methods = ['holdout', 'cross-validation'] baselines=['stratified', 'uniform'] #%% k-Nearest Neighbor Classification if False: list_k = [1, 10, 50, 100, 300, 500] weights = ['uniform', 'distance'] functions.knn(X, y, test_size, random_state, list_k, True, weights, validation_methods, baselines, cfg.default.occupancy_figures, 'knn') if False: # Plot performance (efficiency and effectiveness) functions.plot_evaluation_knn(cfg.default.occupancy_figures, 'knn') if False: # For cross-validation scatter-plot fit time mean and score time functions.plot_efficiency_knn(cfg.default.occupancy_figures, 'knn') if False: # For cross-validation scatter-plot accuracy mean and standard deviation functions.plot_accuracy_knn(cfg.default.occupancy_figures, 'knn') if False: # List variants with highest and lowest accuracy values path = cfg.default.occupancy_figures
def test_mmachine(data_test, *mmachs, n_p=52, fusion='vote'): fusion_dict = {'vote': 0, 'prod': 1, 'sum': 2} if fusion in fusion_dict: print('Fusion scheme is \'' + str(fusion) + '\'') else: print('Not valid fusion scheme. \n Exiting.') return rows, cols = data_test.shape n_pp_test = int(cols / n_p) mmach_size = len(mmachs) p_id_pred_mtrx = np.zeros((mmach_size, n_p), dtype=int) prec = 0 eps = np.zeros((mmach_size, cols)) y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int) for j in range(0, cols): test = data_test[:, j] y_actu[j] = j // n_pp_test i = 0 for mach in mmachs: mach_size = len(mach) p_id_pred_ar = np.zeros(mach_size, dtype=int) for t in range(0, mach_size): test_proj = mach[t].w.T.dot((test - mach[t].mu)[:, None]) indices = knn(test_proj, mach[t].data_train_proj) p_id_pred_ar[t] = mode(mach[t].data_id_memory[indices])[0] if y_actu[j] != mode(p_id_pred_ar)[0]: eps[i, j] += 1 for k in range(0, n_p): k_set = np.transpose(np.argwhere(p_id_pred_ar == k))[0] p_id_pred_mtrx[i, k] = k_set.size i += 1 if fusion_dict.get(fusion) == 0: y_pred[j] = mode(np.argmax(p_id_pred_mtrx, axis=1))[0] elif fusion_dict.get(fusion) == 1: y_pred[j] = np.argmax(np.prod(p_id_pred_mtrx, axis=0)) elif fusion_dict.get(fusion) == 2: y_pred[j] = np.argmax(np.sum(p_id_pred_mtrx, axis=0)) if y_actu[j] == y_pred[j]: prec += 1 prec /= cols print('Precision is %.2f%%' % (100 * prec)) e_mach_avg = np.mean(np.mean(np.square(eps), axis=1)) e_mmach = np.mean(np.square(np.mean(eps, axis=0))) print('The average error of each machine member by acting individually is Emach = %.2f' % e_mach_avg) print('The expected error of the whole master machine is Emach+ = %.2f' % e_mmach) if e_mmach <= e_mach_avg: print('We have Emach+ <= Emach \n -- Success!! Master Machine performs better than individual machines. ' 'Great teamwork!') else: print('We have Emach+ > Emach \n -- Failure... You need to review your teamwork.') conf_mat(y_actu, y_pred)