def hidden_layer_crossvalidation(datatuple): """ Cross validation to choose the number of hidden layers""" FLAGS.create_dir() # save configuration configure_name = "configure.py" copyfile(configure_name, FLAGS.model_dir + configure_name) trainaccs = [] validaccs = [] for i in [3, 4, 5, 6]: FLAGS.n_nodes = [32] * i + [1] FLAGS.n_layer = len(FLAGS.n_nodes) print FLAGS.n_nodes nn_model = NeuralNet(FLAGS.n_layer, FLAGS.n_nodes, FLAGS.n_feat, FLAGS.func_num) nn_model = Adamdelta(datatuple, nn_model, FLAGS.adadelta_gamma) trainaccs.append(nn_model.train_acc) validaccs.append(nn_model.valid_acc) nnu.save_model( nn_model, FLAGS.model_dir + "Adamdelta_" + str(FLAGS.max_iteration) + ".p") print "final round training accuracy: ", trainaccs print "final round validation accuracy: ", validaccs results = tuple([trainaccs, validaccs]) np.save(FLAGS.model_dir + "results.npy", results)
def StochasticGradientDescent(datatuple, nn_model): """ gradient descent over the batch features :param datatuple: :return: """ train_x, train_y, valid_x, valid_y, _, _ = datatuple loss_val = [] train_accs = [] valid_accs = [] for i in range(FLAGS.max_iteration): batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size) delta_grad = compute_gradient(batch_x, batch_y, nn_model) opt.GradientDescentOptimizer(nn_model, delta_grad, FLAGS.learning_rate ) if (i +1) % FLAGS.record_persteps == 0: print "evaluation " bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate) train_acc = evaluate_accuracy(bx, by, nn_model) train_accs.append(train_acc) valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model) valid_accs.append(valid_acc) print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc loss_val.append(evaluate_loss(train_x, train_y, nn_model)) nnu.save_model(nn_model, FLAGS.model_dir + "SGD_" + str(i) + ".npy") # nnu.plot_list(loss_val, FLAGS.fig_dir + "SGD_objvalue.png", 1) # nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "SGD_accs.png") # nn_model.train_acc = train_accs[-1] # nn_model.valid_acc = valid_accs[-1] return nn_model
def AdamGrad(datatuple, nn_model): """ Adam Gradient optimizer :param datatuple: :return: """ train_x, train_y, valid_x, valid_y, _, _ = datatuple max_iteration = FLAGS.max_iteration epsilon = 1e-8 train_accs = [] valid_accs = [] loss_val = [] G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model)) G = nnu.nparray_to_dictionary(G_np, nn_model.model) for i in range(max_iteration): batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size) # compute gradient delta_grad = compute_gradient(batch_x, batch_y, nn_model) # gradient of L(theta) g_square = nnu.dict_mul(delta_grad, delta_grad) # g^2 G = nnu.dict_add(G, g_square) # vt = alpha vt-1 - beta * g G_np = nnu.dict_to_nparray(G) temp_np = np.divide(-float(FLAGS.learning_rate), np.sqrt(G_np + epsilon)) # -learning_rate * / sqrt(G_t + epsilon) temp_dict = nnu.nparray_to_dictionary(temp_np, nn_model.model) # converet np array to dictionary temp_dict = nnu.dict_mul(temp_dict, delta_grad) nn_model.model = nnu.dict_add(nn_model.model, temp_dict) if (i+1) % 100 == 0: print i if (i+1) % FLAGS.record_persteps == 0: nnu.save_model(nn_model, FLAGS.model_dir + "Adam_" + str(i) + ".npy") bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate) train_acc = evaluate_accuracy(bx, by, nn_model) train_accs.append(train_acc) valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model) valid_accs.append(valid_acc) print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc # loss_val.append(evaluate_loss(train_x, train_y, nn_model)) print valid_accs, train_accs nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Adam_accs.png") # nnu.plot_list(loss_val, FLAGS.fig_dir + "Adam_objvalue.png", 1) np.save(FLAGS.fig_dir + "Adam_accs.npy", tuple([train_accs, valid_accs])) nn_model.train_acc = train_accs[-1] nn_model.valid_acc = valid_accs[-1] return nn_model
def NesterovAcceleratedGrad(datatuple, nn_model, alpha, beta): """ Stochastic Gradient Descent with Nesterov Acceleration :param datatuple: :return: """ train_x, train_y, valid_x, valid_y, _, _ = datatuple max_iteration = FLAGS.max_iteration train_accs = [] valid_accs = [] loss_val = [] cumulative_grad_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model)) cumulative_grad = nnu.nparray_to_dictionary(cumulative_grad_np, nn_model.model) for i in range(max_iteration): batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size) cumulative_grad = nnu.dict_mulscala(cumulative_grad, alpha) # alpha * vt-1 nn_model.model = nnu.dict_add(nn_model.model, cumulative_grad) # theta = theta0 + alpha * vt-1 # compute gradient delta_grad = compute_gradient(batch_x, batch_y, nn_model) # gradient of L(theta0 + alpha * vt-1) delta_grad = nnu.dict_mulscala(delta_grad, -beta) # - beta * g cumulative_grad = nnu.dict_add(cumulative_grad, delta_grad) # vt = alpha vt-1 - beta * g nn_model.model = nnu.dict_add(nn_model.model, delta_grad) #theta = theta0 + alpha * vt-1 - beta*g if (i +1) % FLAGS.record_persteps == 0: bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate) train_acc = evaluate_accuracy(bx, by, nn_model) train_accs.append(train_acc) valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model) valid_accs.append(valid_acc) print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc # loss_val.append(evaluate_loss(train_x, train_y, nn_model)) nnu.save_model(nn_model, FLAGS.model_dir + "Nesterov_" + str(i) + ".npy") # nnu.plot_list(loss_val, FLAGS.fig_dir + "Nesterov_objvalue.png", 1) nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Nesterov_accs.png") nn_model.train_acc = train_accs[-1] nn_model.valid_acc = valid_accs[-1] return nn_model
def run_main(datatuple): """ performan training evalutaion and testing :param datatuple: :return: """ train_x, train_y, valid_x, valid_y, test_x, test_y = datatuple FLAGS.create_dir() # create network model n_layer = FLAGS.n_layer # the last layer is the output of network n_feat = FLAGS.n_feat n_nodes = FLAGS.n_nodes if os.path.isfile(FLAGS.nnmodel_load_fname): print "load model" nn_model = nnu.load_model(FLAGS.nnmodel_load_fname) else: print "random init model" nn_model = NeuralNet(n_layer, n_nodes, n_feat, FLAGS.func_num) # save configuration configure_name = "configure.py" copyfile(configure_name, FLAGS.model_dir + configure_name) if FLAGS.Optimizer == 1: print "Optimizer: SGD \n" nn_model = StochasticGradientDescent(datatuple, nn_model) nnu.save_model(nn_model, FLAGS.model_dir + "SGD_" + str(FLAGS.max_iteration)) elif FLAGS.Optimizer == 2: print "Optimizer: NesterovAcceleratedGrad \n" nn_model = NesterovAcceleratedGrad(datatuple, nn_model, FLAGS.Nesterov_alpha, FLAGS.learning_rate) nnu.save_model( nn_model, FLAGS.model_dir + "Nesterov_" + str(FLAGS.max_iteration)) elif FLAGS.Optimizer == 3: print "Optimizer: AdamGrad \n" nn_model = AdamGrad(datatuple, nn_model) nnu.save_model( nn_model, FLAGS.model_dir + "AdamGrad_" + str(FLAGS.max_iteration)) elif FLAGS.Optimizer == 4: print "Optimizer: Adamdelta \n" nn_model = Adamdelta(datatuple, nn_model, FLAGS.adadelta_gamma) nnu.save_model( nn_model, FLAGS.model_dir + "Adamdelta_" + str(FLAGS.max_iteration)) print "test accuracy: ", nnt.evaluate_accuracy(test_x, test_y, nn_model)
def Adamdelta(datatuple, nn_model, gamma): """ Adamdelta Gradient optimizer :param datatuple: :return: """ train_x, train_y, valid_x, valid_y, test_x, test_y = datatuple max_iteration = FLAGS.max_iteration epsilon = 1e-8 loss_val = [] train_accs = [] valid_accs = [] test_accs = [] G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model)) RMS_deltatheta_prev = np.zeros_like(G_np) Delta_theta = np.zeros_like(G_np) for i in range(max_iteration): batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size) # compute gradient delta_grad = compute_gradient(batch_x, batch_y, nn_model) # gradient of L(theta) delta_grad_np = nnu.dict_to_nparray(delta_grad) g_square_np = np.multiply(delta_grad_np, delta_grad_np) # g^2 G_np = G_np * gamma + g_square_np * (1 - gamma) # Gt = gamma * Gt + (1 - gamma) * g^2 RMS_gt = np.sqrt(G_np + epsilon) # sqrt(G_t + epsilon) delta_theta = np.multiply(-np.divide(RMS_deltatheta_prev, RMS_gt), delta_grad_np) # - RMS_delta_theta^2 t-1 / RMS_g^2 t .* gt Delta_theta = Delta_theta*gamma + (delta_theta ** 2) * (1 - gamma) # delta_theta^2*gamma + (1-gamma)*delta_theta^2 RMS_theta = np.sqrt(Delta_theta + epsilon) RMS_deltatheta_prev = RMS_theta temp_dict = nnu.nparray_to_dictionary(delta_theta, nn_model.model) # converet np array to dictionary nn_model.model = nnu.dict_add(nn_model.model, temp_dict) if (i +1) % FLAGS.record_persteps == 0: bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate) # train_acc = evaluate_accuracy(bx, by, nn_model) # train_accs.append(train_acc) # valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model) # valid_accs.append(valid_acc) # print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc # loss_val.append(evaluate_loss(train_x, train_y, nn_model)) nnu.save_model(nn_model, FLAGS.model_dir + "Adamdelta_" + str(i) + ".npy") testbx, testby = nnu.batch_data(test_x, test_y, FLAGS.batch_size_evaluate) test_acc = evaluate_accuracy(testbx, testby, nn_model) test_accs.append(test_acc) print test_accs # nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Adamdelta_accs.png") # nnu.plot_list(loss_val, FLAGS.fig_dir + "Adamdelta_objvalue.png", 1) # nn_model.train_acc = train_accs[-1] # nn_model.valid_acc = valid_accs[-1] return nn_model