def do_windowing(window_size, begin, end): gestures = PrepareDataSet.getlines() lines = gestures.readlines() count = 1 line = '' return_xy = {} for index_line in range(begin, end): if count <= window_size: if count == 1: fields = lines[index_line].split(",") if (end - index_line) >= window_size: y.append(str(fields[32:]).replace("\\n", "").replace("\\r", "").replace("'", "")) if count > 1: line += "," + remove_labels_from_line(lines[index_line]) else: line += remove_labels_from_line(lines[index_line]) count += 1 else: count = 1 X.append(line.split(",")) line = '' return_xy["X"] = copy.deepcopy(X) return_xy["y"] = copy.deepcopy(y) X[:] = [] y[:] = [] print "tamanho de x "+str(len(return_xy["X"])) print "tamanho de y "+str(len(return_xy["y"])) return return_xy
def execute(): gestures = PrepareDataSet.getlines() lines = gestures.readlines() instances_to_be_tested = [] for index_line in range(start_test_line_number, number_of_lines): line = lines[index_line] instances_to_be_tested.append(line) prepared_instances_to_be_tested = prepare_instances_to_be_tested(instances_to_be_tested, False) results = run_knn(prepared_instances_to_be_tested, False) print "Percentual de acertos "\ + str(get_percentage_corrects_predictions(results, get_real_phase_of_gestures(instances_to_be_tested)))+"%"
from sklearn.metrics import confusion_matrix TRAINING = 0 TEST = 1 k = 5 # 3 files = [f for f in listdir(Pds.PATH) if isfile(join(Pds.PATH, f)) and "windowed" in f] # b1_va3_windowed windowed files.sort() print(files) # print(len(files)) data = [Pds.get_dataset(Pds.PATH+"/"+file) for file in files] data_x = [Pds.get_training_data(Pds.TRAINING_SLICE, data[index]['x'], type='x') for index in range(len(data))] print(data_x) data_y = [Pds.get_training_data(Pds.TRAINING_SLICE, data[index]['y'], type='y') for index in range(len(data))] clfs = [Knn.KNNClassifier(data_x[index][TRAINING], data_y[index][TRAINING], k) for index in range(len(data))] # clf = knn.KNNClassifier(data_x[1][1], data_y[1][1], k) # results = [] # for index in range(len(clfs)): # predictions = [] # for inner in range(len(data_x[index][1])): # predictions.append(clfs[index].classify(np.squeeze(np.asarray(data_x[index][TEST][inner]))))
x = [i for i in range(len(eigen_values))] soma = 0 for index in range(15): soma += var_exp[index] print soma plt.plot(x, y, linestyle='--', marker='o', color='b') plt.ylabel("Porcentagem de Representacao") plt.xlabel("Indice dos Autovalores") plt.show() # dataset = pds.get_dataset(pds.FILE) dataset = pds.get_dataset("") reduced_matrix = execute(dataset) print ("final", reduced_matrix) # with open(pds.PATH+"/a1_va3_reducedR.csv", 'w') as csvw: # csvw = csv.writer(csvw, delimiter=',') # csvw.writerows(reduced_matrix) np.savetxt(pds.FILE_REDUCED, reduced_matrix, delimiter=',', fmt='%.8f') print('y', dataset['y']) outf = open(pds.FILE_REDUCED_PRED, 'w') for index in range(len(dataset['y'])):
# -*- coding: utf-8 -*- from sklearn import neighbors import copy from tools import PrepareDataSet weight = 'distance' n_neighbors = 7 percent_instances_to_trains_ = 80 window_size = 8 X = [] y = [] number_of_lines = PrepareDataSet.get_total_of_lines() start_test_line_number = ((number_of_lines * percent_instances_to_trains_)/100) print "teste começa nessa linha = " + str(start_test_line_number) print "número total de linhas no arquivo = " + str(number_of_lines) # lê todo o arquivo de dados do dataset setado no arquivo load_dataset no caso o a1_va3.csv gestures = PrepareDataSet.getlines() # pegar % do arquivo não janelado para treino # refazer isso aqui está indo todo mundo pra o treinamento def get_instances_to_train(): list_train = [] lines = gestures.readlines() for index in range(1, start_test_line_number-1): list_train.append(lines[index]) print "quantidade de linhas na memoria " + str(len(list_train))