def hidden_layer_crossvalidation(datatuple):
    """ Cross validation to choose the number of hidden layers"""
    FLAGS.create_dir()
    # save configuration
    configure_name = "configure.py"
    copyfile(configure_name, FLAGS.model_dir + configure_name)

    trainaccs = []
    validaccs = []

    for i in [3, 4, 5, 6]:
        FLAGS.n_nodes = [32] * i + [1]
        FLAGS.n_layer = len(FLAGS.n_nodes)
        print FLAGS.n_nodes
        nn_model = NeuralNet(FLAGS.n_layer, FLAGS.n_nodes, FLAGS.n_feat,
                             FLAGS.func_num)
        nn_model = Adamdelta(datatuple, nn_model, FLAGS.adadelta_gamma)
        trainaccs.append(nn_model.train_acc)
        validaccs.append(nn_model.valid_acc)
        nnu.save_model(
            nn_model,
            FLAGS.model_dir + "Adamdelta_" + str(FLAGS.max_iteration) + ".p")

    print "final round training accuracy: ", trainaccs
    print "final round validation accuracy: ", validaccs
    results = tuple([trainaccs, validaccs])
    np.save(FLAGS.model_dir + "results.npy", results)
Beispiel #2
0
def StochasticGradientDescent(datatuple, nn_model):
    """  gradient descent over the batch features

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple
    loss_val = []
    train_accs = []
    valid_accs = []
    for i in range(FLAGS.max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        delta_grad = compute_gradient(batch_x, batch_y, nn_model)

        opt.GradientDescentOptimizer(nn_model, delta_grad, FLAGS.learning_rate )

        if (i +1) % FLAGS.record_persteps == 0:
            print "evaluation "
            bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate)
            train_acc = evaluate_accuracy(bx, by, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            loss_val.append(evaluate_loss(train_x, train_y, nn_model))
            nnu.save_model(nn_model, FLAGS.model_dir + "SGD_" + str(i) + ".npy")

    # nnu.plot_list(loss_val, FLAGS.fig_dir + "SGD_objvalue.png", 1)
    # nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "SGD_accs.png")
    # nn_model.train_acc = train_accs[-1]
    # nn_model.valid_acc = valid_accs[-1]
    return nn_model
Beispiel #3
0
def AdamGrad(datatuple, nn_model):
    """ Adam Gradient optimizer

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple
    max_iteration = FLAGS.max_iteration


    epsilon = 1e-8

    train_accs = []
    valid_accs = []
    loss_val = []
    G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model))
    G = nnu.nparray_to_dictionary(G_np, nn_model.model)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        # compute gradient
        delta_grad = compute_gradient(batch_x, batch_y, nn_model)  # gradient of L(theta)

        g_square = nnu.dict_mul(delta_grad, delta_grad)   #  g^2

        G = nnu.dict_add(G, g_square)  # vt = alpha vt-1 - beta * g

        G_np = nnu.dict_to_nparray(G)

        temp_np = np.divide(-float(FLAGS.learning_rate), np.sqrt(G_np + epsilon))   # -learning_rate * / sqrt(G_t + epsilon)

        temp_dict = nnu.nparray_to_dictionary(temp_np, nn_model.model) # converet np array to dictionary

        temp_dict = nnu.dict_mul(temp_dict, delta_grad)

        nn_model.model = nnu.dict_add(nn_model.model, temp_dict)

        if (i+1) % 100 == 0:
            print i
        if (i+1) % FLAGS.record_persteps == 0:
            nnu.save_model(nn_model, FLAGS.model_dir + "Adam_" + str(i) + ".npy")
            bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate)
            train_acc = evaluate_accuracy(bx, by, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            # loss_val.append(evaluate_loss(train_x, train_y, nn_model))

            print valid_accs, train_accs
            nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Adam_accs.png")
    # nnu.plot_list(loss_val, FLAGS.fig_dir + "Adam_objvalue.png", 1)
    np.save(FLAGS.fig_dir + "Adam_accs.npy", tuple([train_accs, valid_accs]))
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
Beispiel #4
0
def NesterovAcceleratedGrad(datatuple, nn_model, alpha, beta):
    """ Stochastic Gradient Descent with Nesterov Acceleration

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple

    max_iteration = FLAGS.max_iteration

    train_accs = []
    valid_accs = []
    loss_val = []
    cumulative_grad_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model))
    cumulative_grad = nnu.nparray_to_dictionary(cumulative_grad_np, nn_model.model)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        cumulative_grad = nnu.dict_mulscala(cumulative_grad, alpha)     # alpha * vt-1
        nn_model.model = nnu.dict_add(nn_model.model, cumulative_grad)   # theta = theta0 + alpha * vt-1

        # compute gradient
        delta_grad = compute_gradient(batch_x, batch_y, nn_model)  # gradient of L(theta0 + alpha * vt-1)

        delta_grad = nnu.dict_mulscala(delta_grad, -beta)  # - beta * g
        cumulative_grad = nnu.dict_add(cumulative_grad, delta_grad)  # vt = alpha vt-1 - beta * g

        nn_model.model = nnu.dict_add(nn_model.model, delta_grad) #theta = theta0 + alpha * vt-1 - beta*g
        if (i +1) % FLAGS.record_persteps == 0:
            bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate)
            train_acc = evaluate_accuracy(bx, by, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            # loss_val.append(evaluate_loss(train_x, train_y, nn_model))
            nnu.save_model(nn_model, FLAGS.model_dir + "Nesterov_" + str(i) + ".npy")

    # nnu.plot_list(loss_val, FLAGS.fig_dir + "Nesterov_objvalue.png", 1)
    nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Nesterov_accs.png")
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
def run_main(datatuple):
    """ performan training evalutaion and testing

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, test_x, test_y = datatuple
    FLAGS.create_dir()

    # create network model
    n_layer = FLAGS.n_layer  # the last layer is the output of network
    n_feat = FLAGS.n_feat
    n_nodes = FLAGS.n_nodes

    if os.path.isfile(FLAGS.nnmodel_load_fname):
        print "load model"
        nn_model = nnu.load_model(FLAGS.nnmodel_load_fname)
    else:
        print "random init model"
        nn_model = NeuralNet(n_layer, n_nodes, n_feat, FLAGS.func_num)

    # save configuration
    configure_name = "configure.py"
    copyfile(configure_name, FLAGS.model_dir + configure_name)
    if FLAGS.Optimizer == 1:
        print "Optimizer: SGD \n"
        nn_model = StochasticGradientDescent(datatuple, nn_model)
        nnu.save_model(nn_model,
                       FLAGS.model_dir + "SGD_" + str(FLAGS.max_iteration))

    elif FLAGS.Optimizer == 2:
        print "Optimizer: NesterovAcceleratedGrad \n"
        nn_model = NesterovAcceleratedGrad(datatuple, nn_model,
                                           FLAGS.Nesterov_alpha,
                                           FLAGS.learning_rate)
        nnu.save_model(
            nn_model, FLAGS.model_dir + "Nesterov_" + str(FLAGS.max_iteration))
    elif FLAGS.Optimizer == 3:
        print "Optimizer: AdamGrad \n"
        nn_model = AdamGrad(datatuple, nn_model)
        nnu.save_model(
            nn_model, FLAGS.model_dir + "AdamGrad_" + str(FLAGS.max_iteration))
    elif FLAGS.Optimizer == 4:
        print "Optimizer: Adamdelta \n"
        nn_model = Adamdelta(datatuple, nn_model, FLAGS.adadelta_gamma)
        nnu.save_model(
            nn_model,
            FLAGS.model_dir + "Adamdelta_" + str(FLAGS.max_iteration))

    print "test accuracy: ", nnt.evaluate_accuracy(test_x, test_y, nn_model)
Beispiel #6
0
def Adamdelta(datatuple, nn_model, gamma):
    """ Adamdelta Gradient optimizer

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, test_x, test_y = datatuple
    max_iteration = FLAGS.max_iteration


    epsilon = 1e-8

    loss_val = []
    train_accs = []
    valid_accs = []
    test_accs = []

    G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model))
    RMS_deltatheta_prev = np.zeros_like(G_np)
    Delta_theta = np.zeros_like(G_np)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        # compute gradient
        delta_grad = compute_gradient(batch_x, batch_y, nn_model)  # gradient of L(theta)

        delta_grad_np = nnu.dict_to_nparray(delta_grad)
        g_square_np = np.multiply(delta_grad_np, delta_grad_np) #  g^2

        G_np = G_np * gamma + g_square_np * (1 - gamma)  # Gt = gamma * Gt  + (1 - gamma) * g^2
        RMS_gt = np.sqrt(G_np + epsilon)  # sqrt(G_t + epsilon)

        delta_theta = np.multiply(-np.divide(RMS_deltatheta_prev, RMS_gt), delta_grad_np)  # - RMS_delta_theta^2 t-1 / RMS_g^2 t .* gt

        Delta_theta = Delta_theta*gamma + (delta_theta ** 2) * (1 - gamma)  # delta_theta^2*gamma + (1-gamma)*delta_theta^2

        RMS_theta = np.sqrt(Delta_theta + epsilon)
        RMS_deltatheta_prev = RMS_theta

        temp_dict = nnu.nparray_to_dictionary(delta_theta, nn_model.model) # converet np array to dictionary
        nn_model.model = nnu.dict_add(nn_model.model, temp_dict)


        if (i +1) % FLAGS.record_persteps == 0:
            bx, by = nnu.batch_data(train_x, train_y, FLAGS.batch_size_evaluate)
            # train_acc = evaluate_accuracy(bx, by, nn_model)
            # train_accs.append(train_acc)
            # valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            # valid_accs.append(valid_acc)
            # print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            # loss_val.append(evaluate_loss(train_x, train_y, nn_model))
            nnu.save_model(nn_model, FLAGS.model_dir + "Adamdelta_" + str(i) + ".npy")
            testbx, testby = nnu.batch_data(test_x, test_y, FLAGS.batch_size_evaluate)
            test_acc = evaluate_accuracy(testbx, testby, nn_model)
            test_accs.append(test_acc)
            print test_accs
            # nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Adamdelta_accs.png")
    # nnu.plot_list(loss_val, FLAGS.fig_dir + "Adamdelta_objvalue.png", 1)
    # nn_model.train_acc = train_accs[-1]
    # nn_model.valid_acc = valid_accs[-1]
    return nn_model