def fit_polynomial(X, Y, M, out_png=None): '''Problem 2.1''' ndata = len(X) nparams = M + 1 Y = np.reshape(Y, (ndata, 1)) phi = gradient_descent.polynomial_design_matrix(X, M) weights = gradient_descent.analytic_least_squares(phi, Y) if out_png: plt.figure(1, figsize=(4, 4)) plt.clf() plt.plot(X, np.array(Y), 'o', color='blue', label='data') xp = np.linspace(0, 1, 100) y_model = np.cos(np.pi * xp) + np.cos(2 * np.pi * xp) plt.plot(xp, y_model, color='orange', label='true model') y_regress = np.dot(gradient_descent.polynomial_design_matrix(xp, M), weights.reshape((nparams, 1))) plt.plot(xp, y_regress, color='red', label='fitted model') SSE = gradient_descent.least_squares_objective( weights, gradient_descent.polynomial_design_matrix(X, M), Y.reshape((ndata, 1))) plt.xlabel('x') plt.ylabel('y') plt.legend(loc='best') plt.title('M = {}, SSE = {:.2f}'.format(M, SSE)) plt.tight_layout() plt.savefig(out_png) return weights
def R_squared(weights, X, Y, M): A = np.empty((len(X), M + 1)) for i in range(M + 1): A[:, i] = X**i TSS = np.sum((Y - np.mean(Y))**2) RSS = gradient_descent.least_squares_objective(weights, A, Y) print TSS, RSS return (TSS - RSS) / TSS
def main(): X, Y = getData(False) ndata = len(X) for M in (0,1,2,3,4,6,8,10): #print 'M=%i' % M weights, _ = fit_polynomial(X,Y,M,'sgd_plots/regress_m_%i.png' % M) #print weights print 'SSE = {}'.format(gd.least_squares_objective(weights, gd.polynomial_design_matrix(X, M), Y.reshape((ndata,1)))) print 'SSE derivative = {}'.format(gd.least_squares_gradient(weights, gd.polynomial_design_matrix(X, M), Y.reshape((ndata,1)))) print 'M=%i & ' % M, 'w = ', [round(w,3) for w in weights[:,0]], '\\\\'
def main(): X, Y = loadFittingDataP1.getData() Y = Y.reshape((100, 1)) w_opt = analytic_least_squares(X, Y) #Problem 1.3.abc, running batch and stochastic gd on a variety of start points, saving num_iters, weights, calcing #difference, plot both num iters and the difference #---------------------------------------------------------------------------------------- start_points = [ np.zeros((10, 1)), np.zeros((10, 1)) + 10, np.zeros((10, 1)) - 10, np.zeros((10, 1)) + 100, np.zeros((10, 1)) - 100, np.zeros((10, 1)) + 1000, np.zeros((10, 1)) - 1000, 20 * np.random.random_sample((10, 1)) - 10 ] batch_iterations = [] batch_weights = [] batch_diff = [] batch_f = [] stochastic_iterations = [] stochastic_weights = [] stochastic_diff = [] stochastic_f = [] fig, ax = plt.subplots() #labels = map(lambda x: str([round(i,2) for i in x]), points) start_points = [np.zeros((10, 1))] for point in start_points: #batch gd w_batch, d_batch, f_batch, iters_batch = gradient_descent.run_gradient_descent( func=lambda theta: least_squares_objective(theta, X, Y), deriv=lambda theta: least_squares_gradient(theta, X, Y), x0=point, h=10.**(-6), tol=0.1) batch_weights.append(w_batch[-1]) batch_iterations.append( iters_batch * 100) #since every batch iteration is 1 round of the whole dataset batch_diff.append( np.linalg.norm(w_opt - w_batch[-1]) / np.linalg.norm(w_opt)) batch_f.append(f_batch) plt.plot(range(0, iters_batch + 1), map(np.log, f_batch[:-1]), color="k") #stochastic gd w_sgd, iters_sgd, err_sgd, f_sgd = gradient_descent.stochastic_gradient_descent( func=least_squares_objective, deriv=least_squares_gradient, X=X, Y=Y, weights0=point, tau=10.**8, k=.75, tol=.1, return_f=True) stochastic_weights.append(w_sgd) stochastic_iterations.append(iters_sgd) stochastic_diff.append( np.linalg.norm(w_opt - w_sgd) / np.linalg.norm(w_opt)) stochastic_f.append(stochastic_f) print iters_sgd / 100 print len(f_sgd) plt.plot(range(0, iters_sgd / 100), map(np.log, f_sgd), color="b") ax.set_xlabel('Iterations') ax.set_ylabel('Objective function', color='k') plt.title("Least Squares Objective stuff") #plt.legend(labels,shadow=True,fancybox=True) plt.show() print "batch iterations" print batch_iterations print "batch diff" print batch_diff print "stochastic iterations" print stochastic_iterations print "stochastic_diff" print stochastic_diff