def evaluate(conf, params, X_data, Y_data): """Evaluate a trained model on X_data. Args: conf: Configuration dictionary params: Dictionary with parameters X_data: numpy array of floats with shape [input dimension, number of examples] Y_data: numpy array of integers with shape [output dimension, number of examples] Returns: num_correct_total: Integer num_examples_evaluated: Integer """ num_examples = X_data.shape[1] num_examples_evaluated = 0 num_correct_total = 0 start_ind = 0 end_ind = conf['batch_size'] while True: X_batch = X_data[:, start_ind:end_ind] Y_batch = model.one_hot(Y_data[start_ind:end_ind], conf['output_dimension']) Y_proposal, _ = model.forward(conf, X_batch, params, is_training=False) _, num_correct = model.cross_entropy_cost(Y_proposal, Y_batch) num_correct_total += num_correct num_examples_evaluated += end_ind - start_ind start_ind += conf['batch_size'] end_ind += conf['batch_size'] if end_ind >= num_examples: end_ind = num_examples if start_ind >= num_examples: break return num_correct_total, num_examples_evaluated
def train(conf, X_train, Y_train, X_devel, Y_devel): """Run training Args: conf: Configuration dictionary X_train: numpy array of floats with shape [input dimension, number of train examples] Y_train: numpy array of integers with shape [output dimension, number of train examples] X_devel: numpy array of floats with shape [input dimension, number of devel examples] Y_devel: numpy array of integers with shape [output dimension, number of devel examples] Returns: params: Dictionary with trained parameters train_progress: Dictionary with progress data, to be used in visualization. devel_progress: Dictionary with progress data, to be used in visualization. """ print("Run training") # Preparation num_examples_in_epoch = X_train.shape[1] example_indices = np.arange(0, num_examples_in_epoch) np.random.shuffle(example_indices) # Initialisation params = model.initialization(conf) # For displaying training progress train_steps = [] train_ccr = [] train_cost = [] devel_steps = [] devel_ccr = [] # Start training step = 0 epoch = 0 num_correct_since_last_check = 0 batch_start_index = 0 batch_end_index = conf['batch_size'] print("Number of training examples in one epoch: ", num_examples_in_epoch) print("Start training") while True: start_time = time.time() batch_indices = get_batch_indices(example_indices, batch_start_index, batch_end_index) X_batch = X_train[:, batch_indices] Y_batch = model.one_hot(Y_train[batch_indices], conf['output_dimension']) Y_proposal, features = model.forward(conf, X_batch, params, is_training=True) print("Finish Foward") cost_value, num_correct = model.cross_entropy_cost(Y_proposal, Y_batch) #print("Finish Cross Entropy") grad_params = model.backward(conf, Y_proposal, Y_batch, params, features) print("Finish Backward") params = model.gradient_descent_update(conf, params, grad_params) print("Finish Gradient Update") print("Finish Training Number" + repr(step)) num_correct_since_last_check += num_correct batch_start_index += conf['batch_size'] batch_end_index += conf['batch_size'] if batch_start_index >= num_examples_in_epoch: epoch += 1 np.random.shuffle(example_indices) batch_start_index = 0 batch_end_index = conf['batch_size'] step += 1 if np.isnan(cost_value): print("ERROR: nan encountered") break if step % conf['train_progress'] == 0: elapsed_time = time.time() - start_time sec_per_batch = elapsed_time / conf['train_progress'] examples_per_sec = conf['batch_size'] * conf[ 'train_progress'] / elapsed_time ccr = num_correct / conf['batch_size'] running_ccr = (num_correct_since_last_check / conf['train_progress'] / conf['batch_size']) num_correct_since_last_check = 0 train_steps.append(step) train_ccr.append(running_ccr) train_cost.append(cost_value) if conf['verbose']: print( "S: {0:>7}, E: {1:>4}, cost: {2:>7.4f}, CCR: {3:>7.4f} ({4:>6.4f}), " "ex/sec: {5:>7.3e}, sec/batch: {6:>7.3e}".format( step, epoch, cost_value, ccr, running_ccr, examples_per_sec, sec_per_batch)) if step % conf['devel_progress'] == 0: num_correct, num_evaluated = evaluate(conf, params, X_devel, Y_devel) devel_steps.append(step) devel_ccr.append(num_correct / num_evaluated) if conf['verbose']: print( "S: {0:>7}, Test on development set. CCR: {1:>5} / {2:>5} = {3:>6.4f}" .format(step, num_correct, num_evaluated, num_correct / num_evaluated)) if step >= conf['max_steps']: print("Terminating training after {} steps".format(step)) break train_progress = { 'steps': train_steps, 'ccr': train_ccr, 'cost': train_cost } devel_progress = {'steps': devel_steps, 'ccr': devel_ccr} return params, train_progress, devel_progress
def train_bpnet(): path = "mnist" # dataset preparation X_train, y_train = load_mnist(path, "train") X_test, y_test = load_mnist(path, "t10k") # dataset defination train_data = DataSet(X_train, y_train) test_data = DataSet(X_test, y_test) # parameters defination in_size = 784 hid_size = 100 out_size = 10 batch_size = 20 epoches = 150 # model defination net = BpNet(in_size, hid_size, out_size) print(train_data.input_data.shape, train_data.target_data.shape) # determine inputs dtype x = T.dmatrix("x") y = T.dmatrix("y") # defination of the layers learning_rate = 0.1 prediction = net.forward(x) # cost function defination cost = cross_entropy_cost(prediction, y) # update the grad list_q = net.update_grad(cost, learning_rate) # apply gradient descent train = theano.function(inputs=[x, y], outputs=[cost], updates=list_q) # prediction predict = theano.function(inputs=[x], outputs=prediction) # training model loss_list = [] percentage_list = [] for k in range(epoches): Length = len(X_train) // batch_size sum_loss = 0 for j in range(Length): out_x = X_train[j * batch_size:(j + 1) * batch_size, :] out_y = y_train[j * batch_size:(j + 1) * batch_size, :] err = train(out_x, out_y) sum_loss += err[0] out_pre = predict(test_data.input_data) out_org = test_data.target_data percentage = test_data_op(out_pre, out_org) print("epoches:%d loss:%0.4f correct:%0.2f%%" % (k, sum_loss / Length, percentage * 100)) loss_list.append(sum_loss / Length) percentage_list.append(percentage) # ---------------------------------------------------------------- # save the model model_name = 'bpnet.pkt' path_save = 'bpnet' if not os.path.exists(path_save): os.mkdir(path_save) f = open(os.path.join(path_save, model_name), 'wb') pickle.dump(net, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() # ---------------------------------------------------------------- # save the loss image x = np.linspace(0, len(loss_list), len(loss_list)) plt.plot(x, loss_list) plt.savefig(os.path.join(path_save, "loss.png")) plt.show() with open(os.path.join(path_save, "loss.txt"), "w") as fp: for k in range(len(loss_list)): fp.write(str(loss_list[k]) + "\n") x = np.linspace(0, len(percentage_list), len(percentage_list)) plt.plot(x, percentage_list) plt.savefig(os.path.join(path_save, "percentage.png")) plt.show() with open(os.path.join(path_save, "percentage.txt"), "w") as fp: for k in range(len(percentage_list)): fp.write(str(percentage_list[k]) + "\n")
def main_test(): print("----------START OF TESTS-----------") # Get configuration conf = config() ################################### Task 1.1: Parameter initialization from model import initialization params = initialization(conf) ################################### Task 1.2: Forward propagation # Import Activation functions [1.2a & 1.2b] from model import activation from model import softmax # Test Activation functions from tests import task_2a from tests import task_2b input_Z, expected_A = task_2a() A = activation(input_Z, 'relu') print('Activation valid?:',np.array_equal(expected_A, A)) input_Z, expected_S = task_2b() S = softmax(input_Z) print('Softmax valid?:',np.array_equal(np.round(expected_S,decimals=3), np.round(S,decimals=3))) # Import Forward propagation [1.2c] from model import forward from tests import task_2c ### Test Forward propagation conf, X_batch, params, expected_Z_1, expected_A_1, expected_Z_2, expected_Y_proposed = task_2c() Y_proposed, features = forward(conf, X_batch, params, is_training=True) print('feature Z_1 valid?:',np.array_equal(expected_Z_1, np.round(features['Z_1'],decimals=8))) print('feature A_1 valid?:',np.array_equal(expected_A_1, np.round(features['A_1'],decimals=8))) print('feature Z_2 valid?:',np.array_equal(expected_Z_2, np.round(features['Z_2'],decimals=8))) print('proposed Y valid?:',np.array_equal(expected_Y_proposed, np.round(Y_proposed,decimals=8))) ################################### Task 1.3: Cross Entropy cost function # Import Cost function from model import cross_entropy_cost from tests import task_3 ### Test Cost function Y_proposed, Y_batch, expected_cost_value, expected_num_correct = task_3() cost_value, num_correct = cross_entropy_cost(Y_proposed, Y_batch) print('Cost value valid?:',np.array_equal(np.round(expected_cost_value,decimals=4), np.round(cost_value,decimals=4))) print('Number of succesess valid?:',np.array_equal(expected_num_correct, np.round(num_correct,decimals=4))) ################################### Task 1.4: Backward propagation # Import Derivative of the activation function [1.4a] from model import activation_derivative from tests import task_4a # Test Derivative of activation input_Z, expected_dg_dz = task_4a() dg_dz = activation_derivative(input_Z, "relu") print('Derivative function valid?:',np.array_equal(expected_dg_dz, np.round(dg_dz,decimals=4))) # Import Backward propagation [1.4b] from model import backward from tests import task_4b # Test Backward propagation (conf, Y_proposed, Y_batch, params, features, expected_grad_W_1, expected_grad_b_1, expected_grad_W_2, expected_grad_b_2) = task_4b() grad_params = backward(conf, Y_proposed, Y_batch, params, features) print('Grad_W_1 valid?:',np.array_equal(np.round(expected_grad_W_1,decimals=4), np.round(grad_params["grad_W_1"],decimals=4))) print('Grad_b_1 valid?:',np.array_equal(np.round(expected_grad_b_1,decimals=4), np.round(grad_params["grad_b_1"][:, np.newaxis],decimals=4))) print('Grad_W_2 valid?:',np.array_equal(np.round(expected_grad_W_2,decimals=4), np.round(grad_params["grad_W_2"],decimals=4))) print('Grad_b_2 valid?:',np.array_equal(np.round(expected_grad_b_2,decimals=4), np.round(grad_params["grad_b_2"][:, np.newaxis],decimals=4))) ################################### Task 1.5: Update parameters # Import Update from model import gradient_descent_update from tests import task_5 # Test Update (conf, params, grad_params, expected_updated_W_1, expected_updated_b_1, expected_updated_W_2, expected_updated_b_2) = task_5() updated_params = gradient_descent_update(conf, params, grad_params) print('update of W_1 valid?:',np.array_equal(np.round(expected_updated_W_1,decimals=4), np.round(updated_params["W_1"],decimals=4))) print('update of b_1 valid?:',np.array_equal(np.round(expected_updated_b_1,decimals=4), np.round(updated_params["b_1"],decimals=4))) print('update of W_2 valid?:',np.array_equal(np.round(expected_updated_W_2,decimals=4), np.round(updated_params["W_2"],decimals=4))) print('update of b_2 valid?:',np.array_equal(np.round(expected_updated_b_2,decimals=4), np.round(updated_params["b_2"],decimals=4))) print("----------END OF TESTS-----------")