def test_adagrad(self): expected = [ tensor([0., 0.]), tensor([-0.9500, -0.9500]), tensor([-1.4506, -1.5260]), tensor([-1.7698, -1.9453]), tensor([-1.9861, -2.2722]), tensor([-2.1364, -2.5363]), tensor([-2.2421, -2.7541]), tensor([-2.3168, -2.9360]), tensor([-2.3698, -3.0894]), tensor([-2.4074, -3.2194]), tensor([-2.4342, -3.3300]), tensor([-2.4532, -3.4246]), tensor([-2.4667, -3.5054]), tensor([-2.4763, -3.5748]), tensor([-2.4832, -3.6343]), tensor([-2.4880, -3.6854]), tensor([-2.4915, -3.7293]), tensor([-2.4939, -3.7670]), tensor([-2.4957, -3.7995]), tensor([-2.4969, -3.8274]), tensor([-2.4978, -3.8515]), tensor([-2.4985, -3.8721]), tensor([-2.4989, -3.8899]), tensor([-2.4992, -3.9053]) ] assert check_positions(expected, adagrad(0.95, TestLevel())), "adagrad test failed"
def test_adagrad(): r = [0.25, 1.5, 1.5] for n in range(3): w1, theta1, _ = algorithms.adagrad(np.tile(d_y[n], 20), np.tile(d_x[n], (20, 1)), r[n]) correct1 = verify(w1, theta1, dxx[n], dyy[n]) print("TEST adagrad with m = " + str(0.25) + ", r = " + str(r[n])) print(correct1)
def run_adagrad(): r = [1.5, 0.25, 0.03, 0.005, 0.001] best = [0, 0, 0, 0] for c in range(len(r)): w_500, theta_500, _ = algorithms.adagrad(D1_500_y, D1_500_x, r[c]) w_1000, theta_1000, _ = algorithms.adagrad(D1_1000_y, D1_1000_x, r[c]) correct_500 = verify(w_500, theta_500, D2_500_x, D2_500_y) print("adagrad with n = 500, alpha = " + str(r[c])) print(correct_500) correct_1000 = verify(w_1000, theta_1000, D2_1000_x, D2_1000_y) print("adagrad with n = 1000, alpha = " + str(r[c])) print(correct_1000) if (correct_500 + correct_1000) / 2.0 > (best[0] + best[1]) / 2.0: best[0] = correct_500 best[1] = correct_1000 best[2] = r[c] print("bestresult: correct_500 = " + str(best[0]) + " correct_1000 = " + str(best[1]) + " learning rate = " + str(best[2]))
def plot_mistake_1000(): _, __, error1 = algorithms.perceptron(D_1000_y, D_1000_x) _, __, error2 = algorithms.perceptron_m(D_1000_y, D_1000_x, 0.005) _, __, error3 = algorithms.winnow(D_1000_y, D_1000_x, 1.1, 1000) _, __, error4 = algorithms.winnow_m(D_1000_y, D_1000_x, 1.1, 2.0, 1000) _, __, error5 = algorithms.adagrad(D_1000_y, D_1000_x, 0.25) p1, = plt.plot(error1, color="blue", label="perceptron") p2, = plt.plot(error2, color="red", label="perceptron with margin") p3, = plt.plot(error3, color="orange", label="winnow") p4, = plt.plot(error4, color="green", label="winnow with margin") p5, = plt.plot(error5, color="black", label="adagrad") plt.legend(handles=[p1, p2, p3, p4, p5], loc=2) plt.title("mistake bound n=1000") plt.show()
def run_adagrad(): r = [1.5, 0.25, 0.03, 0.005, 0.001] for n in range(3): best = [0, 0, 0, 0] for c in range(len(r)): w1, theta1, _ = algorithms.adagrad(d1y[n], d1x[n], r[c]) correct1 = verify(w1, theta1, d2x[n], d2y[n]) print("adagrad with m = " + str(m_val[n]) + ", r = " + str(r[c])) print(correct1) if correct1 > best[0]: best[0] = correct1 best[1] = r[c] print("bestresult for m = " + str(m_val[n]) + ": correct1 = " + str(best[0]) + " learning rate = " + str(best[1]))
func = lambda x, order: hw_func.svm_objective_function(x, features, labels, order) initial_x = np.zeros((d, 1)) sgd_x, sgd_values, sgd_runtimes, sgd_xs = alg.subgradient_descent( func_stochastic, initial_x, max_iterations_sgd, 1) print('Solution found by stochastic subgradient descent', sgd_x) print('Objective function', func(sgd_x, 0)) # 0.8144200035391359 sgd_values = [ func(sgd_xs[i], 0) for i in range(0, max_iterations_sgd, int(max_iterations_sgd / points_to_plot)) ] ada_x, ada_values, ada_runtimes, ada_xs = alg.adagrad(func_stochastic, initial_x, max_iterations_sgd, 1) print('Solution found by stochastic adagrad', ada_x) print('Objective function', func(ada_x, 0)) # 0.8210954033966148 ada_values = [ func(ada_xs[i], 0) for i in range(0, max_iterations_sgd, int(max_iterations_sgd / points_to_plot)) ] sd_x, sd_values, sd_runtimes, sd_xs = alg.subgradient_descent( func, initial_x, max_iterations_sd, 1) print('Solution found by subgradient descent', sd_x) print('Objective function', func(sd_x, 0)) # 0.9125785012725371 sd_values = [ func(sd_xs[i], 0) for i in range(0, max_iterations_sd, int(max_iterations_sd /
sgd_pred = [ np.sign(np.dot(sgd_xs[sgd_minind].T, feature.T)).item(0) for feature in features ] print(sgd_corr) sgd_its = len(sgd_runtimes) sgd_values = [ obj_f(sgd_xs[i], 0) for i in range(0, sgd_its, int(sgd_its / min(sgd_its, points_to_plot))) ] sgd_xs = sgd_xs[0::int(sgd_its / min(sgd_its, points_to_plot))] sgd_grads = sgd_grads[0::int(sgd_its / min(sgd_its, points_to_plot))] 'Ada' ada_x, ada_values, ada_runtimes, ada_xs, ada_grads = \ alg.adagrad(obj_f, initial_x, max_iterations, 0.1) print('Solution found by stochastic adagrad', ada_x) print('Objective function', obj_f(ada_x, 0)) ada_minind = ada_values.index(min(ada_values)) ada_corr = correct_rate(ada_xs[ada_minind], features, labels) print(ada_corr) ada_its = len(ada_runtimes) ada_values = [ obj_f(ada_xs[i], 0) for i in range(0, ada_its, int(ada_its / min(ada_its, points_to_plot))) ] ada_xs = ada_xs[0::int(ada_its / min(ada_its, points_to_plot))] ada_grads = ada_grads[0::int(ada_its / min(ada_its, points_to_plot))] #%% 'LBFGS'
initial_x = np.zeros((d, 1)) sgd_x, sgd_values, sgd_runtimes, sgd_xs = \ alg.subgradient_descent(obj_f, initial_x, max_iterations, 0.1) print('Solution found by stochastic subgradient descent', sgd_x) print('Objective function', obj_f(sgd_x, 0)) sgd_its = len(sgd_runtimes) sgd_values = [ obj_f(sgd_xs[i], 0) for i in range(0, sgd_its, int(sgd_its / min(sgd_its, points_to_plot))) ] #%% 'Ada' ada_x, ada_values, ada_runtimes, ada_xs = alg.adagrad(obj_f, initial_x, max_iterations, 0.1) print('Solution found by stochastic adagrad', ada_x) print('Objective function', obj_f(ada_x, 0)) ada_its = len(ada_runtimes) ada_values = [ obj_f(ada_xs[i], 0) for i in range(0, ada_its, int(ada_its / min(ada_its, points_to_plot))) ] #%% 'Subgradient descent' sd_x, sd_values, sd_runtimes, sd_xs = alg.subgradient_descent( obj_f, initial_x, max_iterations, 0.1) print('Solution found by subgradient descent', sd_x) print('Objective function', obj_f(sd_x, 0))