def plot_lrates(f, df, x0, etas, niter): fig, ax = plt.subplots(nrows=1, ncols=1) for eta in etas: ax.plot(list(xrange(1, niter + 1)), list(take(niter,(f(e) for e in gradient_descent(df, x0, eta=eta)))), label=unicode(eta)) ax.set_xlabel('Iteration Number') ax.set_ylabel('f(x)') plt.legend(title='Learning Rate') plt.show() plt.clf()
def test_sumsq(self): def f(x_i): return sum(x_ij**2 for x_ij in x_i) def df(x_i): return [2 * x_ij for x_ij in x_i] x0 = [5., 4.] tol = 1.e-6 a = until_within_tol((f(e) for e in gradient_descent(df, x0)), tolerance=tol) b = list(a) self.assertLessEqual(abs(b[-2] - b[-1]), tol)
def plot_lrates(f, df, x0, etas, niter): fig, ax = plt.subplots(nrows=1, ncols=1) for eta in etas: ax.plot(list(xrange(1, niter + 1)), list( take(niter, (f(e) for e in gradient_descent(df, x0, eta=eta)))), label=unicode(eta)) ax.set_xlabel('Iteration Number') ax.set_ylabel('f(x)') plt.legend(title='Learning Rate') plt.show() plt.clf()
def fit(cost_f, cost_df, h_theta0, data, eta=0.1, it_max=500, gf='gd'): ''' Compute values of multiple linear regression coefficients Parameters cost_f: Cost function (J) cost_df: gradient of cost function (gradJ for batch and gradJS for stochastic) h_theta0: initial guess for fitting parameters (j cols) data: list of tuples [(Xi, yi)] X: matrix of independent variables (i rows of observations and j cols of variables). x0=1 for all i y: dependent variable (i rows) eta: learning rate it_max: maximum number of iterations Returns Fitting parameters (j cols) ''' X, y = zip(*data) if gf == 'gd': f = partial(cost_f, X, y) df = partial(cost_df, X, y) ans = list( take(it_max, ((h_theta, f(h_theta)) for h_theta in fgd.gradient_descent(df, h_theta0, eta=eta)))) value = list(T(ans)[0]) cost = list(T(ans)[1]) #t = list(until_within_tol(cost, 1e-7)) return value[-1], cost elif gf == 'sgd': df = cost_df cost = [sum(cost_f(xi, yi, h_theta0) for xi, yi in data)] h_theta = h_theta0 eta_new = eta for _ in xrange(it_max): ans = list( take(len(y), (e for e in fgd.sgd(df, X, y, h_theta, eta=eta_new)))) h_theta = ans[-1] cost.append(sum(cost_f(xi, yi, h_theta) for xi, yi in data)) eta_new = 0.99 * eta_new return h_theta, cost else: print('Not a valid function') return
def fit(cost_f, cost_df, h_theta0, data, eta=0.1, it_max=500, gf='gd'): ''' Compute values of multiple linear regression coefficients Parameters cost_f: Cost function (J) cost_df: gradient of cost function (gradJ for batch and gradJS for stochastic) h_theta0: initial guess for fitting parameters (j cols) data: list of tuples [(Xi, yi)] X: matrix of independent variables (i rows of observations and j cols of variables). x0=1 for all i y: dependent variable (i rows) eta: learning rate it_max: maximum number of iterations Returns Fitting parameters (j cols) ''' X, y = zip(*data) if gf == 'gd': f = partial(cost_f, X, y) df = partial(cost_df, X, y) ans = list(take(it_max, ((h_theta, f(h_theta)) for h_theta in fgd.gradient_descent(df, h_theta0, eta=eta)))) value = list(T(ans)[0]) cost = list(T(ans)[1]) #t = list(until_within_tol(cost, 1e-7)) return value[-1], cost elif gf == 'sgd': df = cost_df cost = [sum(cost_f(xi, yi, h_theta0) for xi, yi in data)] h_theta = h_theta0 eta_new = eta for _ in xrange(it_max): ans = list(take(len(y), (e for e in fgd.sgd(df, X, y, h_theta, eta=eta_new)))) h_theta = ans[-1] cost.append(sum(cost_f(xi, yi, h_theta) for xi, yi in data)) eta_new = 0.99 * eta_new return h_theta, cost else: print('Not a valid function') return
from __future__ import print_function, division, unicode_literals from toolz import take, compose, pluck import matplotlib.pyplot as plt from pylsy2 import pylsytable2 from utility import until_within_tol from func_gradient_descent import gradient_descent from out_utils import plot_lrates def f(x_i): return sum(x_ij**2 for x_ij in x_i) def df(x_i): return [2 * x_ij for x_ij in x_i] x0 = [6., 33., 12.2] tol = 1.e-6 al = [1., 0.3, 0.1, 0.03, 0.01, 0.003, 0.001] niter = 100 plot_lrates(f, df, x0, al, niter) result = list(take(50, ((f(e), e) for e in gradient_descent(df, x0)) )) xs = ['x' + unicode(i) for i in xrange(len(x0))] table = pylsytable2(['y'] + xs) table.add_data('y', list(pluck(0, result)), '{:.2e}') for i, x in enumerate(xs): table.add_data(x, list(pluck(i,pluck(1, result))), '{:.2e}') print(table)
def g(x): """ f(x, y) = -exp(-x^3 / 3 + x - y^2) has min at (1,0), saddle point at (-1,0) """ return -math.exp(x[0]**3/-3 + x[0] - x[1]**2) def dg(x): """just the gradient""" return ((1 - x[0]**2) * g(x), -2 * x[1] * g(x)) tol = 1.e-6 b = until_within_tol((g(e) for e in gradient_descent(dg, random_point())), tolerance=tol) print(list(b)) alphas = [1., 0.3, 0.1, 0.03, 0.01, 0.003, 0.001] niter = 100 plot_lrates(g, dg, random_point(), alphas, niter) #x0 = random_point() x0 = [-0.2, 0.5] result = list(take(50, ((g(e), e) for e in gradient_descent(dg, x0)) )) xs = ['x' + unicode(i) for i in xrange(len(x0))] table = pylsytable2(['y'] + xs) table.add_data('y', list(pluck(0, result)), '{:.2e}') for i, x in enumerate(xs):
def g(x): """ f(x, y) = -exp(-x^3 / 3 + x - y^2) has min at (1,0), saddle point at (-1,0) """ return -math.exp(x[0]**3 / -3 + x[0] - x[1]**2) def dg(x): """just the gradient""" return ((1 - x[0]**2) * g(x), -2 * x[1] * g(x)) tol = 1.e-6 b = until_within_tol((g(e) for e in gradient_descent(dg, random_point())), tolerance=tol) print(list(b)) alphas = [1., 0.3, 0.1, 0.03, 0.01, 0.003, 0.001] niter = 100 plot_lrates(g, dg, random_point(), alphas, niter) #x0 = random_point() x0 = [-0.2, 0.5] result = list(take(50, ((g(e), e) for e in gradient_descent(dg, x0)))) xs = ['x' + unicode(i) for i in xrange(len(x0))] table = pylsytable2(['y'] + xs) table.add_data('y', list(pluck(0, result)), '{:.2e}') for i, x in enumerate(xs):