def wolf(w, step): a_up, a_down = 0, 0 a = step current_loss = loss(w, x, y) gradient = grad(w, x, y) w_new = w - a * gradient cond1 = (loss(w_new, x, y) > current_loss + w_eps1 * a * np.square(np.linalg.norm(gradient))) cond2 = (-np.dot(grad(w_new, x, y).T, gradient) < -w_eps2 * np.square(np.linalg.norm(gradient))) while cond1 or cond2: if cond1: a_up = a elif cond2: a_down = a if a_up == 0: a = a_down * w_theta1 w_new = w - a * gradient cond1 = (loss(w_new, x, y) > current_loss + w_eps1 * a * np.square(np.linalg.norm(gradient))) cond2 = (-np.dot(grad(w_new, x, y).T, gradient) < -w_eps2 * np.square(np.linalg.norm(gradient))) continue a = a_up * w_theta2 + a_down * (1 - w_theta2) w_new = w - a * gradient cond1 = (loss(w_new, x, y) > current_loss + w_eps1 * a * np.square(np.linalg.norm(gradient))) cond2 = (-np.dot(grad(w_new, x, y).T, gradient) < -w_eps2 * np.square(np.linalg.norm(gradient))) return a
def armiho(w, stp): step = stp step /= a_theta current_loss = loss(w, x, y) gradient = grad(w, x, y) w_new = w - step * gradient while loss(w_new, x, y) > current_loss - a_eps * step * np.dot( gradient.T, gradient): step *= a_theta w_new = w - step * gradient return step
def plot_performance(time_vec, point_vec, prb, mode, lbl, color, freq): """ Performance plotting function """ loss = prb.loss true_loss = prb.true_loss x, y = prb.x, prb.y if mode: #1 => iterations, 0 => time plt.plot(range(0, len(point_vec)*freq, freq), [np.log10(np.abs(loss(elem, x, y) - true_loss)) for\ elem in point_vec], color, label=lbl) else: plt.plot(time_vec, [np.log10(np.abs(loss(elem, x, y) - true_loss)) for\ elem in point_vec], color, label=lbl) plt.legend()
def stoch_gradient_descent(prb, stop_tol, max_iter, freq): #Parameters update_rate = 5 step0 = 0.1 gamma = 0.5 #used in the step size rule #Retrieving parameters x, y, true_loss, w0 = prb.x, prb.y, prb.true_loss, prb.w0 one_func_grad = prb.one_grad #Resetting the counters w = w0 start = time.clock() time_vec = [] iteration_counter = 0 point_vec = [] current_loss = loss(w, x, y) #last_grads_matrix = np.ones(x.shape) random_matr = np.random.random_integers(0, x.shape[0] - 1, (update_rate * y.size, )) while ( iteration_counter < max_iter * y.size ): #(np.linalg.norm(np.sum(last_grads_matrix, axis = 0)) / y.size > stop_tol) and \ if iteration_counter % (x.shape[0] * freq) == 0: point_vec.append(w) time_vec.append(time.clock() - start) print("SG Iteration ", iteration_counter) #, ": ",\ # np.linalg.norm(np.sum(last_grads_matrix, axis = 0))/ y.size) i = random_matr[iteration_counter % (y.size * update_rate)] if (iteration_counter % (y.size * update_rate) == 0): random_matr = np.random.random_integers(0, x.shape[0] - 1, (update_rate * y.size, )) gradient = one_func_grad(w, x, y, i) # last_grads_matrix[iteration_counter % y.size] = gradient step = step0 / np.power(iteration_counter + 1, gamma) w = w - step * gradient iteration_counter += 1 point_vec.append(w) time_vec.append(time.clock() - start) return (point_vec, time_vec)
import numpy as np from numpy import random import matplotlib.pyplot as plt import scipy.optimize as op from matplotlib.legend_handler import HandlerLine2D import time from linreg import true_loss, grad, loss, batch_loss, batch_grad from sklearn.datasets import load_svmlight_file from pylab import * #Parameters random_seed_w0 = 32 mu, sigma1 = 0, 10 x_d, y_d = load_svmlight_file('datasets/abalone_scale.txt') x = np.concatenate((x_d.toarray(), np.ones((x_d.shape[0], 1))), axis=1) y = y_d.reshape((y_d.size, 1)) dim = x.shape[1] data_name = 'Abalone' #Generating the starting point seed(random_seed_w0) w0 = np.random.normal(mu, sigma1, (dim, 1)) n = y.size m = x.shape[1] opt_res = op.minimize(fun=lambda w: loss(w.reshape((w.size, 1)), x, y), x0=w0, tol=1e-6) #, options={'maxiter': 30}) true_loss = opt_res['fun'] if __name__ == "__main__": print("\nTrue loss: ", true_loss)
def stoch_average_gradient(prb, stop_tol, max_iter, freq): #Retrieving the parameters x, y, true_loss, w0 = prb.x, prb.y, prb.true_loss, prb.w0 loss, one_func_loss, one_func_grad = prb.loss, prb.one_loss, prb.one_grad #Parameters update_rate = 10 l = 10 eps = 0.5 def stoch_grad(w, x, y, i, gradient): local_grad = one_func_grad(w, x, y, i) gradient += (local_grad - grad_matrix[i].reshape(w.shape)) / y.size grad_matrix[i] = local_grad.reshape(w.shape[0], ) return gradient w = w0 # Resetting counters start = time.clock() time_vec = [] iteration_counter = 0 point_vec = [] #Setting the required variables grad_matrix = np.zeros(x.shape) current_loss = loss(w, x, y) seed(random_matr_seed) random_matr = np.random.random_integers(0, x.shape[0] - 1, (update_rate * y.size, )) i = random_matr[iteration_counter % (y.size * update_rate)] gradient = np.zeros(w.shape) gradient = stoch_grad(w, x, y, i, gradient) while ( iteration_counter < max_iter * y.size ): #((np.linalg.norm(gradient) > stop_tol) or (iteration_counter < 2 * x.shape[0])) and \ if iteration_counter % (x.shape[0] * freq) == 0: point_vec.append(w) time_vec.append(time.clock() - start) print("SAG Iteration ", iteration_counter, ": ", np.linalg.norm(gradient)) i = random_matr[iteration_counter % (y.size * update_rate)] if (iteration_counter % (y.size * update_rate) == 0): seed(iteration_counter) random_matr = np.random.random_integers(0, x.shape[0] - 1, (update_rate * y.size, )) cur_one_func_grad = one_func_grad(w, x, y, i) cur_one_func_loss = one_func_loss(w, x, y, i) l *= np.power(2, -1 / x.shape[0]) w_new = w - cur_one_func_grad / l while (one_func_loss(w_new, x, y, i)) > \ cur_one_func_loss - eps * np.dot(cur_one_func_grad.T, cur_one_func_grad) / l: l *= 2 w_new = w - cur_one_func_grad / l step = 1 / l w = w - step * gradient iteration_counter += 1 gradient = stoch_grad(w, x, y, i, gradient) print("SAG Iteration ", iteration_counter, ": ", np.linalg.norm(gradient)) point_vec.append(w) time_vec.append(time.clock() - start) return (point_vec, time_vec)
"""This module is for importing datasets for linear regression""" import numpy as np from numpy import random import matplotlib.pyplot as plt import scipy.optimize as op from matplotlib.legend_handler import HandlerLine2D import time from linreg import true_loss, grad, loss, batch_loss, batch_grad from sklearn.datasets import load_svmlight_file from pylab import * #Parameters random_seed_w0 = 32 mu, sigma1 = 0, 10 x_d, y_d = load_svmlight_file('datasets/abalone_scale.txt') x = np.concatenate((x_d.toarray(), np.ones((x_d.shape[0], 1))), axis=1) y = y_d.reshape((y_d.size, 1)) dim = x.shape[1] data_name = 'Abalone' #Generating the starting point seed (random_seed_w0) w0 = np.random.normal(mu, sigma1, (dim, 1)) n = y.size m = x.shape[1] opt_res = op.minimize(fun=lambda w: loss(w.reshape((w.size, 1)), x, y), x0=w0, tol=1e-6) #, options={'maxiter': 30}) true_loss = opt_res['fun'] if __name__ == "__main__": print("\nTrue loss: ", true_loss)