def _main(): data, _ = mnist.MNIST("train", path="../../machine-learning/data/mnist/", data_size=40, batch_size=20, reshape=False, one_hot=False, binarize=True).to_ndarray() test_data, _ = mnist.MNIST("test", path="../../machine-learning/data/mnist/", data_size=40, batch_size=20, reshape=False, one_hot=False, binarize=True).to_ndarray() max_epoch = 1 # Layer 1 print("----- Layer 1 -----") layer_i = rbm.RBM(train_data=data, num_hidden=1000) layer_i.train(max_epoch=max_epoch) # layer_i_param = (layer_i.weight, layer_i.visible_bias, layer_i.hidden_bias) # Layer 2 print("----- Layer 2 -----") layer_ii = rbm.RBM(train_data=layer_i.hidden_data, num_hidden=500) layer_ii.train(max_epoch=max_epoch) # layer_ii_param = (layer_ii.weight, layer_ii.visible_bias, layer_ii.hidden_bias) # Layer 3 print("----- Layer 3 -----") layer_iii = rbm.RBM(train_data=layer_ii.hidden_data, num_hidden=250) layer_iii.train(max_epoch=max_epoch) # layer_iii_param = (layer_iii.weight, layer_iii.visible_bias, layer_iii.hidden_bias) # Layer 4 print("----- Layer 4 -----") layer_iv = rbm.RBM(train_data=layer_iii.hidden_data, num_hidden=30) layer_iv.train(max_epoch=max_epoch) # layer_iv_param = (layer_iv.weight, layer_iv.visible_bias, layer_iv.hidden_bias) # Backpropagation print("\n=============== Backpropagation ===============\n") bp.backpropagation(layers=[layer_i, layer_ii, layer_iii, layer_iv], train_data=data, test_data=test_data, max_epoch=2)
def get_gradients(self): self.ann = backpropagation(self.ann) #self.layer_gradients = [] for i in range(0, len(self.ann.layers) - 1, 1): gradient = self.ann.layers[ i].dWeight + self.ann.momentum * self.ann.layers[i].weight self.layer_gradients.append(gradient)
def train(self, _input, target): if self.target == None: self.target = np.transpose(np.array([target])) for epoch in range(0, self.max_epoch, 1): self.run(_input) self.calculate_error() print("====EPOCH " + str(epoch + 1) + " ====") print("---OUTPUT---") print(self.output) print("---ERROR---") print(self.error) print("==============") if (self.error < self.desired_error): break self = backpropagation.backpropagation(self) for i in range(0, len(self.layers) - 1, 1): self.layers[i].weight_update = self.layers[ i].dWeight + self.layers[i].weight #print(self.layers[i].weight_update) self.layers[i].update_weights(self.learning_rate) self.layers[i].update_bias(self.learning_rate) self._input = None self.output = None for i in range(0, len(self.layers) - 1, 1): self.layers[i].z_values = None self.layers[i].a_values = None self.layers[i].delta = None self.target = None
def cross_validation(dataset, k_fold, test_folder, original_labels, max_iterations, alpha, beta=0.9, less_acceptable_difference=0.0001, momentum=True, patience=50, logger=logger): result_dict = cvn.get_empty_result_dict(original_labels) # generating folds folds = generate_folds(dataset, k_fold, test_folder) net_file = '{}/network.txt'.format(test_folder) weights_file = '{}/initial_weights.txt'.format(test_folder) for f_index, (train_file, test_file) in enumerate(folds): print('Processing fold {}'.format(f_index + 1)) print('Training network') network, training_result = bp.backpropagation( net_file, weights_file, train_file, max_iterations, alpha, less_acceptable_difference=less_acceptable_difference, validation_filename=test_file, possible_labels=original_labels, patience=patience, logger=logger) epochs_trained = len(training_result[LOSS]) net_arch = [l.size for l in network.layers] training_result[FOLD].extend([f_index + 1] * epochs_trained) training_result[EPOCH].extend(list(range(1, epochs_trained + 1))) training_result[ARCHITECTURE] = [net_arch] * epochs_trained training_result[REGULARIZATION] = [network.regularizationFactor ] * epochs_trained for k in result_dict.keys(): result_dict[k].extend(training_result[k]) pd.DataFrame(result_dict).to_csv('{}/result.csv'.format(test_folder), index=False) return result_dict
def cross_validation(dataset, k_fold, test_folder, original_labels, max_iterations, alpha, beta=0.9, less_acceptable_difference=0.0001, momentum=True, patience=50, logger=logger): y_field = dataset.columns[-1] possible_labels = original_labels attributes = dataset.columns[:-1].tolist() result_dict = get_empty_result_dict(possible_labels) # generating folds folds = generate_folds(dataset, k_fold, test_folder) net_file = '{}/network.txt'.format(test_folder) weights_file = '{}/initial_weights.txt'.format(test_folder) for f_index, (train_file, test_file) in enumerate(folds): print('Processing fold {}'.format(f_index + 1)) print('Training network') training_result = bp.backpropagation( net_file, weights_file, train_file, max_iterations, alpha, less_acceptable_difference=less_acceptable_difference, validation_filename=test_file, possible_labels=possible_labels, patience=patience, logger=logger) epochs_trained = len(training_result[LOSS]) training_result[FOLD].extend([f_index + 1] * epochs_trained) training_result[EPOCH].extend(list(range(1, epochs_trained + 1))) for k in result_dict.keys(): result_dict[k].extend(training_result[k]) return result_dict
def plot_gd_trajectory(w1_init, w2_init, learningrate): Loss = [] w_1 = [] w_2 = [] w_1.append(float(w1_init)) w_2.append(float(w2_init)) weight[weightindex_startlayer][weightindex_neuron_nextlayer[0] - 1][weightindex_neuron_startlayer[0] - 1] = w1_init weight[weightindex_startlayer][weightindex_neuron_nextlayer[1] - 1][weightindex_neuron_startlayer[1] - 1] = w2_init networkoutput, outputsequence, preoutputsequence = network.output( float(x), weight, bias) Loss.append(float(0.5 * (y - float(networkoutput))**2)) for i in range(N): #calculate the gradient with respect to current weight and bias# backprop = backpropagation(L=L, n=n, activation=sigma, weight=weight, bias=bias, outputsequence=outputsequence, preoutputsequence=preoutputsequence) delta = backprop.error(y) gradweight, gradbias = backprop.grad(x, delta) #update the weights and the loss values# weight[weightindex_startlayer][weightindex_neuron_nextlayer[0] - 1][ weightindex_neuron_startlayer[0] - 1] = w_1[i] - learningrate * gradweight[weightindex_startlayer][ weightindex_neuron_nextlayer[0] - 1][weightindex_neuron_startlayer[0] - 1] weight[weightindex_startlayer][weightindex_neuron_nextlayer[1] - 1][ weightindex_neuron_startlayer[1] - 1] = w_2[i] - learningrate * gradweight[weightindex_startlayer][ weightindex_neuron_nextlayer[1] - 1][weightindex_neuron_startlayer[1] - 1] networkoutput, outputsequence, preoutputsequence = network.output( float(x), weight, bias) Loss.append(float(0.5 * (y - float(networkoutput))**2)) w_1.append( weight[weightindex_startlayer][weightindex_neuron_nextlayer[0] - 1][weightindex_neuron_startlayer[0] - 1]) w_2.append( weight[weightindex_startlayer][weightindex_neuron_nextlayer[1] - 1][weightindex_neuron_startlayer[1] - 1]) return w_1, w_2, Loss
def step_training(dataset, step, test_folder, original_labels, alpha, beta=0.9, momentum=True, logger=logger): result_dict = { ARCHITECTURE: [], REGULARIZATION: [], TRAINING_EXAMPLES: [], LOSS: [], ACCURACY: [], VAL_ACCURACY: [], VAL_LOSS: [], } folds = generate_data_steps(dataset, step, test_folder) net_file = '{}/network.txt'.format(test_folder) weights_file = '{}/initial_weights.txt'.format(test_folder) for f_index, (train_file, test_file) in enumerate(folds): print('Processing fold {}'.format(f_index + 1)) print('Training network') network, training_result = bp.backpropagation(net_file, weights_file, train_file, 1, alpha, validation_filename=test_file, possible_labels=original_labels, logger=logger) epochs_trained = len(training_result[LOSS]) net_arch = [l.size for l in network.layers] training_result[ARCHITECTURE] = [net_arch] * epochs_trained training_result[REGULARIZATION] = [network.regularizationFactor] * epochs_trained training_result[TRAINING_EXAMPLES] = [f_index * step + step] for k in result_dict.keys(): result_dict[k].extend(training_result[k]) pd.DataFrame(result_dict).to_csv('{}/result.csv'.format(test_folder), index=False) return result_dict
def exemplo_back(layers, lamb, theta_matrices, instancias): # 3 camadas [1 2 1] network = np.array(layers) theta1 = theta_matrices[0] theta2 = theta_matrices[1] thetas = np.array([theta1, theta2]) regularizacao = lamb learning_rate = 1 exemplos = instancias novos_thetas_back, gradientes_back = bp.backpropagation(exemplos, thetas, regularizacao, network, learning_rate, debug=1) novos_thetas_numer, gradientes_numer = numerical_verification( 0.00000010000, thetas, exemplos, regularizacao, network, learning_rate, debug=1) print("") print("Gradientes Backpropagation: ") print(gradientes_back) print("Gradientes Numerical Approximation: ") print(gradientes_numer) print("") str_diferenca = diff_gradients(gradientes_back, gradientes_numer, debug=1) return novos_thetas_numer, gradientes_numer, str_diferenca
def exemplo_back_um(layers, lamb, theta_matrices): # 3 camadas [1 2 1] network = np.array(layers) theta1 = theta_matrices[0] theta2 = theta_matrices[1] thetas = np.array([theta1, theta2]) regularizacao = lamb learning_rate = 1 entradas = [[0.13], [0.42]] saidas = [[0.9], [0.23]] exemplos = [] for i in range(0, 2): exemplos.append([entradas[i], saidas[i]]) novos_thetas_back, gradientes_back = bp.backpropagation(exemplos, thetas, regularizacao, network, learning_rate, debug=1) novos_thetas_numer, gradientes_numer = vn.numerical_verification( 0.00000010000, thetas, exemplos, regularizacao, network, learning_rate, debug=1) vn.diff_gradients(gradientes_back, gradientes_numer, 1)
def batch_train(self, data_set, targets, batch_size): batch_coeff = 1 / batch_size for epoch in range(self.max_epoch): for input_data in range(0, len(data_set), 1): if self.target == None: self.target = np.transpose(np.array([targets[input_data]])) self.run(data_set[input_data]) self.calculate_error() if epoch % 100 == 0: print("====EPOCH " + str(epoch + 1) + "====") print("++input " + str(input_data + 1) + "++") print("---OUTPUT---") print(self.output) print("---ERROR---") print(self.error) print("==============") self = backpropagation.backpropagation(self) self._input = None self.output = None self.target = None """ for i in range(0, len(self.layers) - 1, 1): self.layers[i].z_values = None self.layers[i].a_values = None self.layers[i].delta = None """ for i in range(0, len(self.layers) - 1, 1): self.layers[i].weight_update = batch_coeff * self.layers[ i].dWeight + self.momentum * self.layers[i].weight #print(self.layers[i].weight_update) #print(self.layers[i].dBias) self.layers[i].update_weights(self.learning_rate) self.layers[i].update_bias(self.learning_rate)
reader = np.genfromtxt("files/XOR_trn.csv", delimiter=',') trn = np.append(-np.ones((len(reader[:, 1]), 1)), reader[:, 0:2], 1) yd = np.expand_dims(reader[:, 2], axis=1) w = initW.initialize_w(np.ones((len(trn[0, :]), 1)), np.array([2, 1], np.int)) vel = 0.1 epoc = 10 accurV = np.zeros((epoc, 1)) wV = np.zeros((epoc, 3)) errorV = np.zeros((len(trn[:, 1]), epoc)) for i in range(epoc): for j in range(len(trn[:, 0])): inputV = np.expand_dims(trn[j, :], axis=1) y = salY.salidasy(inputV, w) w = bp.backpropagation(w, y, yd[j], vel) accur = 0 for j in range(len(trn[:, 0])): inputV = np.expand_dims(trn[j, :], axis=1) y = salY.salidasy(inputV, w) ye = yd[j] accur = (accur + 1 if abs(ye - y[-1][-1]) < 0.3 else accur) accurV[i] = accur / len(trn[:, 1]) print(np.mean(accurV))
def treina_e_testa(args): random.seed(10) np.random.seed(10) treino, teste, target_coluna, config_rede, alfa, reg_lambda, batch_size, K = args #INICIANDO THETAS COM VALORES ALEATÓRIOS theta = [] for i in range(len(config_rede) - 1): theta.append( np.matrix( np.random.normal(0, 1, size=(config_rede[i + 1], config_rede[i] + 1)))) if (DEBUG): print("THETA INICIAL: \n", theta[i]) #ORGANIZA TREINO EM MATRIZ matrix_treino = np.matrix(treino.to_numpy()) treino_organizado = [] for i in range(len(treino)): entradas = np.delete(matrix_treino[i], target_coluna, 1) saida = matrix_treino[i, target_coluna] lista_saidas = [ 0.0 ] * config_rede[-1] #cria uma lista com as classes de saida possíveis lista_saidas[int( saida * (config_rede[-1] - 1))] = 1.0 #coloca 1 na classe esperada treino_organizado.append([np.array(entradas), np.array(lista_saidas)]) #ORGANIZA TESTE EM MATRIZ matrix_teste = np.matrix(teste.to_numpy()) teste_organizado = [] for i in range(len(teste)): entradas = np.delete(matrix_teste[i], target_coluna, 1) #retira a coluna target teste_organizado.append( [np.array(entradas), matrix_teste[i, target_coluna]]) if (DEBUG): print("NO BACK") theta_modelo, custoJ_S = bp.backpropagation(treino_organizado, theta, alfa, reg_lambda, config_rede, K, 0, batch_size) if (DEBUG): print("novo theta\n", theta_modelo) key_list = list(treino.columns) target_name = key_list[target_coluna] #inicializa a matriz de confusao table_of_confusion = {'CERTO': 0, 'ERRADO': 0} nrow = 0 for test_row in teste_organizado: resp = bp.predict(test_row[0], theta_modelo) #print("[{}/{}]{:0.2f}% complete...".format(i+1, K,100*(nrow+1)/len(test.index) ), end='\r') nrow += 1 #print("EXEMPLO: ",test_row) if (DEBUG): print("RESPOSTA DO PREDICT: ", resp) localizado = np.where(resp == np.max(resp)) predito = localizado[1][0] if (DEBUG): print("maior em ", predito) esperado = int(test_row[1] * (config_rede[-1] - 1)) if (DEBUG): print("esperado ", esperado) #atualiza a matriz de confusao if predito == esperado: table_of_confusion['CERTO'] += 1 else: table_of_confusion['ERRADO'] += 1 return table_of_confusion
def run(data, thetas, regularization, network, dataset_file): K = 10 if (dataset_file == "wine.data"): #ok min_diff = 0.001 B = 10 if (dataset_file == "wdbc.data"): #ok B = 12 min_diff = 0.001 if (dataset_file == "pima.tsv" ): #mais do que 0.15 de regularizacao piora muito os resultados #melhores valores que encontrei, mas a performance ainda está abaixo dos outros datasets B = 5 min_diff = 0.00002 if (dataset_file == "ionosphere.data"): #ok melhor B = 2 ou 5 B = 5 min_diff = 0.0002 partitions = generate_partitions(data, K) learning_rate = 0.5 for i in range(K): print("Running K = " + str(i)) novos_thetas = copy.deepcopy(thetas) evaluation = partitions[i] training = [] for p in range(K): if p != i: training = training + partitions[p] batch_p = generate_partitions(training, B) # using b mini-batches for p in range(B): cont = 1 initial_j_value = nn.calculate_j(batch_p[p], novos_thetas, regularization, network) #print("j inicial:" + str(initial_j_value)) while (cont < 1000): novos_thetas, gradientes = bp.backpropagation( batch_p[p], novos_thetas, regularization, network, learning_rate, 0) cont += 1 if (cont % 20) == 0: #60 j_value = nn.calculate_j(batch_p[p], novos_thetas, regularization, network) #print("j value:" + str(j_value)) if (initial_j_value >= j_value): diff = initial_j_value - j_value else: diff = j_value - initial_j_value #print("diferenca: " + str(diff)) #print("performance batch:" + str(p)) #performance = calculate_performance(network, evaluation, novos_thetas) if (diff < min_diff): #print("performance batch: " + str(p)) #performance = calculate_performance(network, evaluation, novos_thetas) break initial_j_value = j_value #if(cont == 999): #print("estouro batch: " + str(p)) #performance = calculate_performance(network, evaluation, novos_thetas) # generate cross validation training (K-1) and evaluation (1) partitions ''' training = [] for p in range(K): if p != i: training = training + partitions[p] cont = 1 max_iteracoes = 1000 while (cont < max_iteracoes): novos_thetas, gradientes = bp.backpropagation(training, novos_thetas, regularization, network, learning_rate, 0) if (cont % 10) == 0: performance = calculate_performance(network, evaluation, novos_thetas) if(performance[2] > 0.95): break cont += 1''' respostas = [] for i in range(len(evaluation)): if network[-1] == 1: # Se possui apenas uma saida resposta_certa = evaluation[i][1][0] resposta_rede = nn.evaluate(evaluation[i], novos_thetas, network) resposta_rede_int = round(resposta_rede[0]) respostas.append((resposta_certa, resposta_rede_int)) else: # Se possui mais de uma saída index_resposta_certa = evaluation[i][1].index( max(evaluation[i][1])) respostas_rede = nn.evaluate(evaluation[i], novos_thetas, network) index_resposta_rede = np.where( respostas_rede == max(respostas_rede))[0][0] respostas.append((index_resposta_certa, index_resposta_rede)) print(respostas) if network[-1] == 1: # Se possui apenas uma saida resultado = ut.performance_binary(respostas, [0, 1]) else: # Se possui mais de uma saída resultado = ut.performance_multiclass(respostas, [0, 1, 2]) print("Precision , Recall , F1-Score") print(resultado) #generate batches instead of using training directly #j_value = nn.j_function(training, thetas, regularization, network) return novos_thetas, gradientes