예제 #1
0
 def test_adagrad(self):
     expected = [
         tensor([0., 0.]),
         tensor([-0.9500, -0.9500]),
         tensor([-1.4506, -1.5260]),
         tensor([-1.7698, -1.9453]),
         tensor([-1.9861, -2.2722]),
         tensor([-2.1364, -2.5363]),
         tensor([-2.2421, -2.7541]),
         tensor([-2.3168, -2.9360]),
         tensor([-2.3698, -3.0894]),
         tensor([-2.4074, -3.2194]),
         tensor([-2.4342, -3.3300]),
         tensor([-2.4532, -3.4246]),
         tensor([-2.4667, -3.5054]),
         tensor([-2.4763, -3.5748]),
         tensor([-2.4832, -3.6343]),
         tensor([-2.4880, -3.6854]),
         tensor([-2.4915, -3.7293]),
         tensor([-2.4939, -3.7670]),
         tensor([-2.4957, -3.7995]),
         tensor([-2.4969, -3.8274]),
         tensor([-2.4978, -3.8515]),
         tensor([-2.4985, -3.8721]),
         tensor([-2.4989, -3.8899]),
         tensor([-2.4992, -3.9053])
     ]
     assert check_positions(expected,
                            adagrad(0.95,
                                    TestLevel())), "adagrad test failed"
예제 #2
0
def test_adagrad():
    r = [0.25, 1.5, 1.5]
    for n in range(3):
        w1, theta1, _ = algorithms.adagrad(np.tile(d_y[n], 20),
                                           np.tile(d_x[n], (20, 1)), r[n])
        correct1 = verify(w1, theta1, dxx[n], dyy[n])
        print("TEST adagrad with m = " + str(0.25) + ", r = " + str(r[n]))
        print(correct1)
예제 #3
0
def run_adagrad():
    r = [1.5, 0.25, 0.03, 0.005, 0.001]
    best = [0, 0, 0, 0]
    for c in range(len(r)):
        w_500, theta_500, _ = algorithms.adagrad(D1_500_y, D1_500_x, r[c])
        w_1000, theta_1000, _ = algorithms.adagrad(D1_1000_y, D1_1000_x, r[c])
        correct_500 = verify(w_500, theta_500, D2_500_x, D2_500_y)
        print("adagrad with n = 500, alpha = " + str(r[c]))
        print(correct_500)
        correct_1000 = verify(w_1000, theta_1000, D2_1000_x, D2_1000_y)
        print("adagrad with n = 1000, alpha = " + str(r[c]))
        print(correct_1000)
        if (correct_500 + correct_1000) / 2.0 > (best[0] + best[1]) / 2.0:
            best[0] = correct_500
            best[1] = correct_1000
            best[2] = r[c]
    print("bestresult: correct_500 = " + str(best[0]) + " correct_1000 = " +
          str(best[1]) + " learning rate = " + str(best[2]))
예제 #4
0
def plot_mistake_1000():
    _, __, error1 = algorithms.perceptron(D_1000_y, D_1000_x)
    _, __, error2 = algorithms.perceptron_m(D_1000_y, D_1000_x, 0.005)
    _, __, error3 = algorithms.winnow(D_1000_y, D_1000_x, 1.1, 1000)
    _, __, error4 = algorithms.winnow_m(D_1000_y, D_1000_x, 1.1, 2.0, 1000)
    _, __, error5 = algorithms.adagrad(D_1000_y, D_1000_x, 0.25)
    p1, = plt.plot(error1, color="blue", label="perceptron")
    p2, = plt.plot(error2, color="red", label="perceptron with margin")
    p3, = plt.plot(error3, color="orange", label="winnow")
    p4, = plt.plot(error4, color="green", label="winnow with margin")
    p5, = plt.plot(error5, color="black", label="adagrad")
    plt.legend(handles=[p1, p2, p3, p4, p5], loc=2)
    plt.title("mistake bound n=1000")
    plt.show()
예제 #5
0
def run_adagrad():
    r = [1.5, 0.25, 0.03, 0.005, 0.001]
    for n in range(3):
        best = [0, 0, 0, 0]
        for c in range(len(r)):
            w1, theta1, _ = algorithms.adagrad(d1y[n], d1x[n], r[c])
            correct1 = verify(w1, theta1, d2x[n], d2y[n])
            print("adagrad with m = " + str(m_val[n]) + ", r = " + str(r[c]))
            print(correct1)
            if correct1 > best[0]:
                best[0] = correct1
                best[1] = r[c]
        print("bestresult for m = " + str(m_val[n]) + ": correct1 = " +
              str(best[0]) + " learning rate = " + str(best[1]))
예제 #6
0
func = lambda x, order: hw_func.svm_objective_function(x, features, labels,
                                                       order)

initial_x = np.zeros((d, 1))

sgd_x, sgd_values, sgd_runtimes, sgd_xs = alg.subgradient_descent(
    func_stochastic, initial_x, max_iterations_sgd, 1)
print('Solution found by stochastic subgradient descent', sgd_x)
print('Objective function', func(sgd_x, 0))  # 0.8144200035391359
sgd_values = [
    func(sgd_xs[i], 0) for i in range(0, max_iterations_sgd,
                                      int(max_iterations_sgd / points_to_plot))
]

ada_x, ada_values, ada_runtimes, ada_xs = alg.adagrad(func_stochastic,
                                                      initial_x,
                                                      max_iterations_sgd, 1)
print('Solution found by stochastic adagrad', ada_x)
print('Objective function', func(ada_x, 0))  # 0.8210954033966148
ada_values = [
    func(ada_xs[i], 0) for i in range(0, max_iterations_sgd,
                                      int(max_iterations_sgd / points_to_plot))
]

sd_x, sd_values, sd_runtimes, sd_xs = alg.subgradient_descent(
    func, initial_x, max_iterations_sd, 1)
print('Solution found by subgradient descent', sd_x)
print('Objective function', func(sd_x, 0))  # 0.9125785012725371
sd_values = [
    func(sd_xs[i], 0)
    for i in range(0, max_iterations_sd, int(max_iterations_sd /
sgd_pred = [
    np.sign(np.dot(sgd_xs[sgd_minind].T, feature.T)).item(0)
    for feature in features
]
print(sgd_corr)
sgd_its = len(sgd_runtimes)
sgd_values = [
    obj_f(sgd_xs[i], 0)
    for i in range(0, sgd_its, int(sgd_its / min(sgd_its, points_to_plot)))
]
sgd_xs = sgd_xs[0::int(sgd_its / min(sgd_its, points_to_plot))]
sgd_grads = sgd_grads[0::int(sgd_its / min(sgd_its, points_to_plot))]

'Ada'
ada_x, ada_values, ada_runtimes, ada_xs, ada_grads = \
    alg.adagrad(obj_f, initial_x, max_iterations, 0.1)
print('Solution found by stochastic adagrad', ada_x)
print('Objective function', obj_f(ada_x, 0))
ada_minind = ada_values.index(min(ada_values))
ada_corr = correct_rate(ada_xs[ada_minind], features, labels)
print(ada_corr)

ada_its = len(ada_runtimes)
ada_values = [
    obj_f(ada_xs[i], 0)
    for i in range(0, ada_its, int(ada_its / min(ada_its, points_to_plot)))
]
ada_xs = ada_xs[0::int(ada_its / min(ada_its, points_to_plot))]
ada_grads = ada_grads[0::int(ada_its / min(ada_its, points_to_plot))]
#%%
'LBFGS'
예제 #8
0
initial_x = np.zeros((d, 1))

sgd_x, sgd_values, sgd_runtimes, sgd_xs = \
    alg.subgradient_descent(obj_f, initial_x, max_iterations, 0.1)

print('Solution found by stochastic subgradient descent', sgd_x)
print('Objective function', obj_f(sgd_x, 0))
sgd_its = len(sgd_runtimes)
sgd_values = [
    obj_f(sgd_xs[i], 0)
    for i in range(0, sgd_its, int(sgd_its / min(sgd_its, points_to_plot)))
]
#%%
'Ada'

ada_x, ada_values, ada_runtimes, ada_xs = alg.adagrad(obj_f, initial_x,
                                                      max_iterations, 0.1)
print('Solution found by stochastic adagrad', ada_x)
print('Objective function', obj_f(ada_x, 0))

ada_its = len(ada_runtimes)
ada_values = [
    obj_f(ada_xs[i], 0)
    for i in range(0, ada_its, int(ada_its / min(ada_its, points_to_plot)))
]
#%%
'Subgradient descent'

sd_x, sd_values, sd_runtimes, sd_xs = alg.subgradient_descent(
    obj_f, initial_x, max_iterations, 0.1)
print('Solution found by subgradient descent', sd_x)
print('Objective function', obj_f(sd_x, 0))