Ejemplo n.º 1
0
def StochasticGradientDescent(datatuple, nn_model):
    """  gradient descent over the batch features

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple
    loss_val = []
    train_accs = []
    valid_accs = []
    for i in range(FLAGS.max_iteration):
        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        delta_grad = compute_gradient(batch_x, batch_y, nn_model)
        opt.GradientDescentOptimizer(nn_model, delta_grad, FLAGS.learning_rate)

        if i % FLAGS.record_persteps == 0:
            train_acc = evaluate_accuracy(train_x, train_y, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            loss_val.append(evaluate_loss(train_x, train_y, nn_model))

    nnu.plot_list(loss_val, FLAGS.fig_dir + "SGD_objvalue.png", 1)
    nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "SGD_accs.png")
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
Ejemplo n.º 2
0
def AdamGrad(datatuple, nn_model):
    """ Adam Gradient optimizer

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple
    max_iteration = FLAGS.max_iteration
    n_layer = FLAGS.n_layer  # the last layer is the output of network
    n_feat = FLAGS.n_feat
    n_nodes = FLAGS.n_nodes

    epsilon = 1e-8

    train_accs = []
    valid_accs = []
    loss_val = []
    G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model, n_layer))
    G = nnu.nparray_to_dictionary(G_np, n_feat, n_nodes, n_layer)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        # compute gradient
        delta_grad = compute_gradient(batch_x, batch_y,
                                      nn_model)  # gradient of L(theta)

        g_square = nnu.dict_mul(delta_grad, delta_grad)  #  g^2

        G = nnu.dict_add(G, g_square)  # vt = alpha vt-1 - beta * g

        G_np = nnu.dict_to_nparray(G, n_layer)

        temp_np = np.divide(
            -float(FLAGS.learning_rate),
            np.sqrt(G_np + epsilon))  # -learning_rate * / sqrt(G_t + epsilon)

        temp_dict = nnu.nparray_to_dictionary(
            temp_np, n_feat, n_nodes,
            n_layer)  # converet np array to dictionary

        temp_dict = nnu.dict_mul(temp_dict, delta_grad)

        nn_model.model = nnu.dict_add(nn_model.model, temp_dict)

        if i % FLAGS.record_persteps == 0:
            train_acc = evaluate_accuracy(train_x, train_y, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            loss_val.append(evaluate_loss(train_x, train_y, nn_model))

    nnu.plot_list_acc(train_accs, valid_accs, FLAGS.fig_dir + "Adam_accs.png")
    nnu.plot_list(loss_val, FLAGS.fig_dir + "Adam_objvalue.png", 1)
    np.save(FLAGS.fig_dir + "Adam_accs.npy", tuple([train_accs, valid_accs]))
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
Ejemplo n.º 3
0
def NesterovAcceleratedGrad(datatuple, nn_model, alpha, beta):
    """ Stochastic Gradient Descent with Nesterov Acceleration

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple

    max_iteration = FLAGS.max_iteration
    n_layer = FLAGS.n_layer  # the last layer is the output of network
    n_feat = FLAGS.n_feat
    n_nodes = FLAGS.n_nodes
    #
    # nn_model = NeuralNet(n_layer, n_nodes, n_feat)
    train_accs = []
    valid_accs = []
    loss_val = []
    cumulative_grad_np = np.zeros_like(
        nnu.dict_to_nparray(nn_model.model, n_layer))
    cumulative_grad = nnu.nparray_to_dictionary(cumulative_grad_np, n_feat,
                                                n_nodes, n_layer)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        cumulative_grad = nnu.dict_mulscala(cumulative_grad,
                                            alpha)  # alpha * vt-1
        nn_model.model = nnu.dict_add(
            nn_model.model, cumulative_grad)  # theta = theta0 + alpha * vt-1

        # compute gradient
        delta_grad = compute_gradient(
            batch_x, batch_y, nn_model)  # gradient of L(theta0 + alpha * vt-1)

        delta_grad = nnu.dict_mulscala(delta_grad, -beta)  # - beta * g
        cumulative_grad = nnu.dict_add(
            cumulative_grad, delta_grad)  # vt = alpha vt-1 - beta * g

        nn_model.model = nnu.dict_add(
            nn_model.model,
            delta_grad)  #theta = theta0 + alpha * vt-1 - beta*g
        if i % FLAGS.record_persteps == 0:
            train_acc = evaluate_accuracy(train_x, train_y, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            loss_val.append(evaluate_loss(train_x, train_y, nn_model))

    nnu.plot_list(loss_val, FLAGS.fig_dir + "Nesterov_objvalue.png", 1)
    nnu.plot_list_acc(train_accs, valid_accs,
                      FLAGS.fig_dir + "Nesterov_accs.png")
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
Ejemplo n.º 4
0
def Adamdelta(datatuple, nn_model, gamma):
    """ Adamdelta Gradient optimizer

    :param datatuple:
    :return:
    """
    train_x, train_y, valid_x, valid_y, _, _ = datatuple
    max_iteration = FLAGS.max_iteration
    n_layer = FLAGS.n_layer  # the last layer is the output of network
    n_feat = FLAGS.n_feat
    n_nodes = FLAGS.n_nodes

    epsilon = 1e-8

    loss_val = []
    train_accs = []
    valid_accs = []

    G_np = np.zeros_like(nnu.dict_to_nparray(nn_model.model, n_layer))
    RMS_deltatheta_prev = np.zeros_like(G_np)
    Delta_theta = np.zeros_like(G_np)
    for i in range(max_iteration):

        batch_x, batch_y = nnu.batch_data(train_x, train_y, FLAGS.batch_size)

        # compute gradient
        delta_grad = compute_gradient(batch_x, batch_y,
                                      nn_model)  # gradient of L(theta)

        delta_grad_np = nnu.dict_to_nparray(delta_grad, n_layer)
        g_square_np = np.multiply(delta_grad_np, delta_grad_np)  #  g^2

        G_np = G_np * gamma + g_square_np * (
            1 - gamma)  # Gt = gamma * Gt  + (1 - gamma) * g^2
        RMS_gt = np.sqrt(G_np + epsilon)  # sqrt(G_t + epsilon)

        delta_theta = np.multiply(
            -np.divide(RMS_deltatheta_prev, RMS_gt),
            delta_grad_np)  # - RMS_delta_theta^2 t-1 / RMS_g^2 t .* gt

        Delta_theta = Delta_theta * gamma + (delta_theta**2) * (
            1 - gamma)  # delta_theta^2*gamma + (1-gamma)*delta_theta^2

        RMS_theta = np.sqrt(Delta_theta + epsilon)
        RMS_deltatheta_prev = RMS_theta

        temp_dict = nnu.nparray_to_dictionary(
            delta_theta, n_feat, n_nodes,
            n_layer)  # converet np array to dictionary
        nn_model.model = nnu.dict_add(nn_model.model, temp_dict)

        if i % FLAGS.record_persteps == 0:
            train_acc = evaluate_accuracy(train_x, train_y, nn_model)
            train_accs.append(train_acc)
            valid_acc = evaluate_accuracy(valid_x, valid_y, nn_model)
            valid_accs.append(valid_acc)
            print "step ", i, " training acc: ", train_acc, " valid acc:", valid_acc
            loss_val.append(evaluate_loss(train_x, train_y, nn_model))

    nnu.plot_list_acc(train_accs, valid_accs,
                      FLAGS.fig_dir + "Adamdelta_accs.png")
    nnu.plot_list(loss_val, FLAGS.fig_dir + "Adamdelta_objvalue.png", 1)
    nn_model.train_acc = train_accs[-1]
    nn_model.valid_acc = valid_accs[-1]
    return nn_model
Ejemplo n.º 5
0
# n_layer = 2  # the last layer is the output of network
#
# nn_model = NeuralNet(28, 2)
#
# nn_model.feedforward(features)
# # datatuple = tuple([features, labels, features, labels, features, labels])
# # StochasticGradientDescent(datatuple, nn_model)
# err = np.random.randn(1, 10)
# nn_model.backprop(err)

# SGD
# trainaccs = [0.3426, 0.4405, 0.4779, 0.5213,0.5428, 0.5549]
# validaccs = [0.3315, 0.4448, 0.4767, 0.5344, 0.5429, 0.5549]
# figname = "../results/" + "sgd_cnn.png"

# adamdelta
# trainaccs = [0.3505, 0.4817, 0.5303, 0.573,0.6008, 0.6309, 0.6506, 0.6839, 0.6815]
# validaccs = [0.351, 0.4759, 0.5276, 0.5747,0.5977, 0.6274, 0.6428, 0.671, 0.6829]
# figname = "../results/" + "adamdelta_cnn.png"

# Nesterov
# trainaccs = [0.3263, 0.3049, 0.2829, 0.3984]
# validaccs = [0.3303, 0.3061, 0.289, 0.4085]
# figname = "../results/" + "Nesterov_cnn.png"

# adamgrad
trainaccs = [0.2665, 0.318, 0.3912, 0.4186, 0.4444]
validaccs = [0.2732, 0.3283, 0.3919, 0.4158, 0.4358]
figname = "../results/" + "adamgrad_cnn.png"
nnu.plot_list_acc(trainaccs, validaccs, figname)