def main(): np.random.seed(1) xtrain, ytrain, w_true = make_data_linreg_1d() N = xtrain.shape[0] D = 2 Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s w_init = np.zeros(D) model = LinregModel(w_init) logger = MinimizeLogger(model.objective, (Xtrain, ytrain), print_freq=10) params_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) params_bfgs, loss_bfgs, logger = bfgs_fit(Xtrain, ytrain, model, logger) assert (np.allclose(params_bfgs, params_ols)) assert (np.allclose(loss_bfgs, loss_ols)) params_autograd, loss_autograd = bfgs_fit(Xtrain, ytrain, model, logger=None, use_autograd=True) assert (np.allclose(params_bfgs, params_autograd)) assert (np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed" print logger.obj_trace plot_loss_trace(logger.obj_trace, loss_ols, 'BFGS') model_true = LinregModel(w_true) plot_error_surface_and_param_trace(xtrain, ytrain, model_true, logger.param_trace, 'BFGS') plt.show()
def main(): np.random.seed(1) xtrain, Xtrain, ytrain, params_true, true_fun, ttl = make_data_linreg_1d(21, 'linear') model = LinregModel(1, True) params_init = model.init_params() print model # Check that OLS and BFGS give same result params_ols, loss_ols = model.ols_fit(Xtrain, ytrain) obj_fun = model.objective grad_fun = model.gradient params_bfgs, loss_bfgs = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain)) assert(np.allclose(params_bfgs, params_ols)) assert(np.allclose(loss_bfgs, loss_ols)) # Check that analytic gradient and automatic gradient give same result # when evaluated on training data grad_fun = autograd.grad(obj_fun) grad_auto = grad_fun(params_init, xtrain, ytrain) grad_finite_diff = autograd.util.nd(lambda p : obj_fun(p, Xtrain, ytrain), params_init)[0] grad_analytic = model.gradient(params_init, Xtrain, ytrain) assert(np.allclose(grad_auto, grad_finite_diff)) assert(np.allclose(grad_auto, grad_analytic)) params_autograd, loss_autograd = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain)) assert(np.allclose(params_bfgs, params_autograd)) assert(np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed"
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() predict_fun = lambda x: LinregModel.prediction(params_true, x) plot_data_and_pred(xtrain, ytrain, predict_fun) loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) plot_error_surface(loss_fun, params_true) plt.show()
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() N = xtrain.shape[0] Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s w_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) expt_configs = make_expt_config(N) nexpts = len(expt_configs) print nexpts nrows, ncols = nsubplots(nexpts) #nrows, ncols = 4, 2 loss_trace_fig = plt.figure("loss trace fig") param_trace_fig = plt.figure("param trace fig") folder = 'figures' for expt_num, config in enumerate(expt_configs): logger = sgd.SGDLogger(print_freq=10) np.random.seed(1) batchifier = sgd.MiniBatcher(Xtrain, ytrain, config['batch_size']) initial_params = np.zeros(2) lr_fun = lambda(iter): sgd.get_learning_rate_exp_decay(iter, config['init_lr'], config['lr_decay']) ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config obj_fun = LinregModel.objective #grad_fun = LinregModel.gradient grad_fun = autograd.grad(obj_fun) result = sgd.sgd_minimize(initial_params, obj_fun, grad_fun, batchifier, config['n_steps'], lr_fun, config['momentum'], callback=logger.update) print result plotnum = expt_num + 1 ax = loss_trace_fig.add_subplot(nrows, ncols, plotnum) plot_loss_trace(logger.obj_trace, loss_ols, ax) ax.set_title(ttl) ax = param_trace_fig.add_subplot(nrows, ncols, plotnum) loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) plot_error_surface(loss_fun, params_true, ax) plot_param_trace(logger.param_trace, ax) ax.set_title(ttl) plt.figure("loss trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_loss_trace.png') plt.savefig(fname) plt.figure("param trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_param_trace.png') plt.savefig(fname) plt.show()
def main(): np.random.seed(1) xtrain, ytrain, w_true = make_data_linreg_1d() N = xtrain.shape[0] D = 2 Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s w_init = np.zeros(D) model = LinregModel(w_init) logger = MinimizeLogger(model.objective, (Xtrain, ytrain), print_freq=10) params_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) params_bfgs, loss_bfgs, logger = bfgs_fit(Xtrain, ytrain, model, logger) assert(np.allclose(params_bfgs, params_ols)) assert(np.allclose(loss_bfgs, loss_ols)) params_autograd, loss_autograd = bfgs_fit(Xtrain, ytrain, model, logger=None, use_autograd=True) assert(np.allclose(params_bfgs, params_autograd)) assert(np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed" print logger.obj_trace plot_loss_trace(logger.obj_trace, loss_ols, 'BFGS') model_true = LinregModel(w_true) plot_error_surface_and_param_trace(xtrain, ytrain, model_true, logger.param_trace, 'BFGS') plt.show()
def main(): np.random.seed(1) xtrain, ytrain, w_true = make_data_linreg_1d() model = LinregModel(w_true) plot_data_and_pred(xtrain, ytrain, model) plot_error_surface(xtrain, ytrain, model) plt.show()
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() N = xtrain.shape[0] D = 2 Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s params_init = np.zeros(D) logger = MinimizeLogger(LinregModel.objective, (Xtrain, ytrain), print_freq=10) # Check that OLS and BFGS give same result params_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) obj_fun = LinregModel.objective grad_fun = LinregModel.gradient params_bfgs, loss_bfgs, logger = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain), logger) assert (np.allclose(params_bfgs, params_ols)) assert (np.allclose(loss_bfgs, loss_ols)) # Check that analytic gradient and automatic gradient give same result grad_fun = autograd.grad(obj_fun) grad_auto = grad_fun(params_init, Xtrain, ytrain) grad_finite_diff = autograd.util.nd(lambda p: obj_fun(p, Xtrain, ytrain), params_init)[0] grad_analytic = LinregModel.gradient(params_init, Xtrain, ytrain) assert (np.allclose(grad_auto, grad_finite_diff)) assert (np.allclose(grad_auto, grad_analytic)) params_autograd, loss_autograd = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain)) assert (np.allclose(params_bfgs, params_autograd)) assert (np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed" # Plot loss vs time print logger.obj_trace ax = plot_loss_trace(logger.obj_trace, loss_ols) ax.set_title('BFGS') # Plot 2d trajectory of parameter values over time loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) ax = plot_error_surface(loss_fun, params_true) plot_param_trace(logger.param_trace, ax) ax.set_title('BFGS') plt.show()
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() N = xtrain.shape[0] D = 2 Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s params_init = np.zeros(D) logger = MinimizeLogger(LinregModel.objective, (Xtrain, ytrain), print_freq=10) # Check that OLS and BFGS give same result params_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) obj_fun = LinregModel.objective grad_fun = LinregModel.gradient params_bfgs, loss_bfgs, logger = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain), logger) assert(np.allclose(params_bfgs, params_ols)) assert(np.allclose(loss_bfgs, loss_ols)) # Check that analytic gradient and automatic gradient give same result grad_fun = autograd.grad(obj_fun) grad_auto = grad_fun(params_init, Xtrain, ytrain) grad_finite_diff = autograd.util.nd(lambda p : obj_fun(p, Xtrain, ytrain), params_init)[0] grad_analytic = LinregModel.gradient(params_init, Xtrain, ytrain) assert(np.allclose(grad_auto, grad_finite_diff)) assert(np.allclose(grad_auto, grad_analytic)) params_autograd, loss_autograd = bfgs_fit(params_init, obj_fun, grad_fun, (Xtrain, ytrain)) assert(np.allclose(params_bfgs, params_autograd)) assert(np.allclose(loss_bfgs, loss_autograd)) print "All assertions passed" # Plot loss vs time print logger.obj_trace ax = plot_loss_trace(logger.obj_trace, loss_ols) ax.set_title('BFGS') # Plot 2d trajectory of parameter values over time loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) ax = plot_error_surface(loss_fun, params_true) plot_param_trace(logger.param_trace, ax) ax.set_title('BFGS') plt.show()
def main(): for fun_type in ['linear', 'quad', 'sine']: np.random.seed(1) N = 20 Xtrain, Ytrain, params_true, true_fun, ttl = make_data_linreg_1d(N, fun_type) model = LinregModel(1, True) params_ols, loss_ols = model.ols_fit(Xtrain, Ytrain) print(ttl) print params_ols # Plot data predict_fun = lambda x: model.prediction(params_ols, x) ax = plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun) ax.set_title(ttl) # Plot error surface loss_fun = lambda w0, w1: model.objective([w0, w1], Xtrain, Ytrain) ax = plot_error_surface_2d(loss_fun, params_ols, params_true, fun_type) ax.set_title(ttl) plt.show()
def run_expt(config, loss_opt=0): ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config Xtrain, Ytrain, params_true, true_fun, fun_name = \ demo.make_data_linreg_1d(config['N'], config['fun_type']) data_dim = Xtrain.shape[1] N = Xtrain.shape[0] Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain) model_type = config['model_type'] if model_type == 'linear': model = LinregModel(data_dim, add_ones=True) params, loss = model.ols_fit(Xtrain, Ytrain) elif model_type[0:3] == 'mlp': _, layer_sizes = model_type.split(':') layer_sizes = [int(n) for n in layer_sizes.split('-')] model = MLP(layer_sizes, 'regression', L2_reg=0.001) else: raise ValueError('unknown model type {}'.format(model_type)) initial_params = model.init_params() param_dim = len(initial_params) plot_data = (data_dim == 1) plot_params = (param_dim == 2) nplots = 1 if plot_data: nplots += 1 if plot_params: nplots += 1 plot_rows, plot_cols = util.nsubplots(nplots) if config['optimizer'] == 'BFGS': obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) grad_fun = autograd.grad(obj_fun) logger = opt.OptimLogger(lambda params: obj_fun(params), eval_freq=1, store_freq=1, print_freq=1) params, obj = opt.bfgs(obj_fun, grad_fun, initial_params, config['num_epochs'], logger.callback) if config['optimizer'] == 'SGD': B = config['batch_size'] M = N / B # num_minibatches_per_epoch (num iter per epoch) max_iters = config['num_epochs'] * M grad_fun_with_iter = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain) #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain) obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) sf = config.get('store_freq', M) logger = opt.OptimLogger(obj_fun, eval_freq=sf, store_freq=sf, print_freq=0) sgd_fun = config['sgd_fun'] if config['lr_tune']==True: eval_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) lr, lrs, scores = opt.lr_tuner(eval_fun, 'grid', sgd_fun, grad_fun_with_iter, initial_params, int(np.ceil(max_iters*0.1))) print 'lr tuner chose lr {:0.3f}'.format(lr) print lrs print scores config['lr_init'] = lr lr_fun = lambda iter: opt.lr_exp_decay(iter, config['lr_init'], config['lr_decay'], config['lr_step']) params, obj = sgd_fun(obj_fun, grad_fun_with_iter, initial_params, max_iters, logger.callback, lr_fun) training_loss = model.PNLL(params, Xtrain, Ytrain) print 'finished fitting, training loss {:0.3g}, {} obj calls, {} grad calls'.\ format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls) fig = plt.figure() ax = fig.add_subplot(plot_rows, plot_cols, 1) opt.plot_loss_trace(logger.eval_trace, loss_opt, ax) ax.set_title('final objective {:0.3g}'.format(training_loss)) ax.set_xlabel('epochs') if plot_data: ax = fig.add_subplot(plot_rows, plot_cols, 2) predict_fun = lambda X: model.predictions(params, X) demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax) if plot_params: ax = fig.add_subplot(plot_rows, plot_cols, 3) loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain) demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax) demo.plot_param_trace_2d(logger.param_trace, ax) ttl = config_to_str(config) # recompute in case lr has been estimated fig.suptitle(ttl) folder = 'figures/linreg-sgd' fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl)) plt.savefig(fname) return training_loss
def main(): np.random.seed(1) folder = 'figures/linreg-sgd' N = 50 num_epochs = 100 #fun_type = 'linear' fun_type = 'sine' #fun_type = 'quad' #model_type = 'linear' model_type = 'mlp:1-10-1' configs = [] # BFGS has to be the first config, in order to compute loss_opt configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'BFGS' }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'method': 'Rprop', 'improved': True }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'momentum', 'mass': 0.9 }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'RMSprop', 'grad_sq_decay': 0.9 }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999 }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'const', 'init_lr': 0.05, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999 }) configs.append({ 'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'const', 'init_lr': 0.001, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999 }) params_opt = None loss_opt = None for expt_num, config in enumerate(configs): np.random.seed(1) ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config Xtrain, Ytrain, params_true, true_fun, fun_name = make_data_linreg_1d(\ config['N'], config['fun_type']) data_dim = Xtrain.shape[1] if model_type == 'linear': model = LinregModel(data_dim, add_ones=True) params_opt, loss_opt = model.ols_fit(Xtrain, Ytrain) elif model_type[0:3] == 'mlp': _, layer_sizes = model_type.split(':') layer_sizes = [int(n) for n in layer_sizes.split('-')] model = MLP(layer_sizes, 'regression', L2_reg=0.001) else: raise ValueError('unknown model type {}'.format(model_type)) initial_params = model.init_params() obj_fun = model.PNLL grad_fun = model.gradient param_dim = len(initial_params) plot_data = (data_dim == 1) plot_params = (param_dim == 2) nplots = 2 if plot_data: nplots += 1 if plot_params: nplots += 1 plot_rows, plot_cols = util.nsubplots(nplots) if config['optimizer'] == 'BFGS': logger = sgd.MinimizeLogger(obj_fun, grad_fun, (Xtrain, Ytrain), print_freq=1, store_params=True) params, loss, n_fun_evals = sgd.bfgs_fit(initial_params, obj_fun, grad_fun, (Xtrain, Ytrain), logger.update) num_props = n_fun_evals * config['N'] loss_avg = loss if params_opt is None: params_opt = params loss_opt = loss if config['optimizer'] == 'SGD': logger = sgd.SGDLogger(print_freq=20, store_params=True) if config.has_key('lr_fun'): if config['lr_fun'] == 'exp': lr_fun = lambda iter, epoch: sgd.lr_exp_decay( iter, config['init_lr'], config['lr_decay']) if config['lr_fun'] == 'const': lr_fun = lambda iter, epoch: config['init_lr'] else: lr_fun = None if config['method'] == 'momentum': sgd_updater = sgd.SGDMomentum(lr_fun, config['mass']) if config['method'] == 'RMSprop': sgd_updater = sgd.RMSprop(lr_fun, config['grad_sq_decay']) if config['method'] == 'ADAM': sgd_updater = sgd.ADAM(lr_fun, config['grad_decay'], config['grad_sq_decay']) if config['method'] == 'Rprop': sgd_updater = sgd.Rprop(improved_Rprop=config['improved']) params, loss, num_minibatch_updates, params_avg, loss_avg = sgd.sgd_minimize( initial_params, obj_fun, grad_fun, Xtrain, Ytrain, config['batch_size'], config['num_epochs'], sgd_updater, logger.update) num_props = num_minibatch_updates * config['batch_size'] print 'finished fitting, {} obj, {} grad, {} props'.format( model.num_obj_fun_calls, model.num_grad_fun_calls, num_props) fig = plt.figure() ax = fig.add_subplot(plot_rows, plot_cols, 1) plot_loss_trace(logger.obj_trace, loss_opt, ax, num_props) ax.set_title('final objective {:0.3f}, {:0.3f}'.format(loss, loss_avg)) ax = fig.add_subplot(plot_rows, plot_cols, 2) ax.plot(logger.grad_norm_trace) ax.set_title('gradient norm vs num updates') if plot_data: ax = fig.add_subplot(plot_rows, plot_cols, 3) predict_fun = lambda X: model.predictions(params, X) plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax) if plot_params: ax = fig.add_subplot(plot_rows, plot_cols, 4) loss_fun = lambda w0, w1: model.PNLL([w0, w1], Xtrain, Ytrain) plot_error_surface_2d(loss_fun, params_opt, params_true, fun_type, ax) plot_param_trace_2d(logger.param_trace, ax) fig.suptitle(ttl) fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl)) plt.savefig(fname) plt.show()
def main(): np.random.seed(1) xtrain, ytrain, params_true = make_data_linreg_1d() N = xtrain.shape[0] Xtrain = np.c_[np.ones(N), xtrain] # add column of 1s w_ols, loss_ols = LinregModel.ols_fit(Xtrain, ytrain) expt_configs = make_expt_config(N) nexpts = len(expt_configs) print nexpts nrows, ncols = nsubplots(nexpts) #nrows, ncols = 4, 2 loss_trace_fig = plt.figure("loss trace fig") param_trace_fig = plt.figure("param trace fig") folder = 'figures' for expt_num, config in enumerate(expt_configs): logger = sgd.SGDLogger(print_freq=10) np.random.seed(1) batchifier = sgd.MiniBatcher(Xtrain, ytrain, config['batch_size']) initial_params = np.zeros(2) lr_fun = lambda (iter): sgd.get_learning_rate_exp_decay( iter, config['init_lr'], config['lr_decay']) ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config obj_fun = LinregModel.objective #grad_fun = LinregModel.gradient grad_fun = autograd.grad(obj_fun) result = sgd.sgd_minimize(initial_params, obj_fun, grad_fun, batchifier, config['n_steps'], lr_fun, config['momentum'], callback=logger.update) print result plotnum = expt_num + 1 ax = loss_trace_fig.add_subplot(nrows, ncols, plotnum) plot_loss_trace(logger.obj_trace, loss_ols, ax) ax.set_title(ttl) ax = param_trace_fig.add_subplot(nrows, ncols, plotnum) loss_fun = lambda w0, w1: LinregModel.objective([w0, w1], xtrain, ytrain) plot_error_surface(loss_fun, params_true, ax) plot_param_trace(logger.param_trace, ax) ax.set_title(ttl) plt.figure("loss trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_loss_trace.png') plt.savefig(fname) plt.figure("param trace fig") fname = os.path.join(folder, 'linreg_1d_sgd_param_trace.png') plt.savefig(fname) plt.show()
def main(): np.random.seed(1) folder = 'figures/linreg-sgd' N = 50 num_epochs = 100 #fun_type = 'linear' fun_type = 'sine' #fun_type = 'quad' #model_type = 'linear' model_type = 'mlp:1-10-1' configs = [] # BFGS has to be the first config, in order to compute loss_opt configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'BFGS'}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'method': 'Rprop', 'improved': True}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'momentum', 'mass': 0.9}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'RMSprop', 'grad_sq_decay': 0.9}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'exp', 'init_lr': 0.05, 'lr_decay': 0.9, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'const', 'init_lr': 0.05, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999}) configs.append({'fun_type': fun_type, 'N': N, 'model': model_type, 'optimizer': 'SGD', 'batch_size': 10, 'num_epochs': num_epochs, 'lr_fun': 'const', 'init_lr': 0.001, 'method': 'ADAM', 'grad_decay': 0.9, 'grad_sq_decay': 0.999}) params_opt = None loss_opt = None for expt_num, config in enumerate(configs): np.random.seed(1) ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config Xtrain, Ytrain, params_true, true_fun, fun_name = make_data_linreg_1d(\ config['N'], config['fun_type']) data_dim = Xtrain.shape[1] if model_type == 'linear': model = LinregModel(data_dim, add_ones=True) params_opt, loss_opt = model.ols_fit(Xtrain, Ytrain) elif model_type[0:3] == 'mlp': _, layer_sizes = model_type.split(':') layer_sizes = [int(n) for n in layer_sizes.split('-')] model = MLP(layer_sizes, 'regression', L2_reg=0.001) else: raise ValueError('unknown model type {}'.format(model_type)) initial_params = model.init_params() obj_fun = model.PNLL grad_fun = model.gradient param_dim = len(initial_params) plot_data = (data_dim == 1) plot_params = (param_dim == 2) nplots = 2 if plot_data: nplots += 1 if plot_params: nplots += 1 plot_rows, plot_cols = util.nsubplots(nplots) if config['optimizer'] == 'BFGS': logger = sgd.MinimizeLogger(obj_fun, grad_fun, (Xtrain, Ytrain), print_freq=1, store_params=True) params, loss, n_fun_evals = sgd.bfgs_fit(initial_params, obj_fun, grad_fun, (Xtrain, Ytrain), logger.update) num_props = n_fun_evals * config['N'] loss_avg = loss if params_opt is None: params_opt = params loss_opt = loss if config['optimizer'] == 'SGD': logger = sgd.SGDLogger(print_freq=20, store_params=True) if config.has_key('lr_fun'): if config['lr_fun'] == 'exp': lr_fun = lambda iter, epoch: sgd.lr_exp_decay(iter, config['init_lr'], config['lr_decay']) if config['lr_fun'] == 'const': lr_fun = lambda iter, epoch: config['init_lr'] else: lr_fun = None if config['method'] == 'momentum': sgd_updater = sgd.SGDMomentum(lr_fun, config['mass']) if config['method'] == 'RMSprop': sgd_updater = sgd.RMSprop(lr_fun, config['grad_sq_decay']) if config['method'] == 'ADAM': sgd_updater = sgd.ADAM(lr_fun, config['grad_decay'], config['grad_sq_decay']) if config['method'] == 'Rprop': sgd_updater = sgd.Rprop(improved_Rprop = config['improved']) params, loss, num_minibatch_updates, params_avg, loss_avg = sgd.sgd_minimize(initial_params, obj_fun, grad_fun, Xtrain, Ytrain, config['batch_size'], config['num_epochs'], sgd_updater, logger.update) num_props = num_minibatch_updates * config['batch_size'] print 'finished fitting, {} obj, {} grad, {} props'.format(model.num_obj_fun_calls, model.num_grad_fun_calls, num_props) fig = plt.figure() ax = fig.add_subplot(plot_rows, plot_cols, 1) plot_loss_trace(logger.obj_trace, loss_opt, ax, num_props) ax.set_title('final objective {:0.3f}, {:0.3f}'.format(loss, loss_avg)) ax = fig.add_subplot(plot_rows, plot_cols, 2) ax.plot(logger.grad_norm_trace) ax.set_title('gradient norm vs num updates') if plot_data: ax = fig.add_subplot(plot_rows, plot_cols, 3) predict_fun = lambda X: model.predictions(params, X) plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax) if plot_params: ax = fig.add_subplot(plot_rows, plot_cols, 4) loss_fun = lambda w0, w1: model.PNLL([w0, w1], Xtrain, Ytrain) plot_error_surface_2d(loss_fun, params_opt, params_true, fun_type, ax) plot_param_trace_2d(logger.param_trace, ax) fig.suptitle(ttl) fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl)) plt.savefig(fname) plt.show()
def run_expt(config, loss_opt=0): ttl = config_to_str(config) print '\nstarting experiment {}'.format(ttl) print config Xtrain, Ytrain, params_true, true_fun, fun_name = demo.make_data_linreg_1d(config['N'], config['fun_type']) data_dim = Xtrain.shape[1] N = Xtrain.shape[0] Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain) model_type = config['model_type'] if model_type == 'linear': model = LinregModel(data_dim, add_ones=True) params, loss = model.ols_fit(Xtrain, Ytrain) elif model_type[0:3] == 'mlp': _, layer_sizes = model_type.split(':') layer_sizes = [int(n) for n in layer_sizes.split('-')] model = MLP(layer_sizes, 'regression', L2_reg=0.001, Ntrain=N) else: raise ValueError('unknown model type {}'.format(model_type)) initial_params = model.init_params() param_dim = len(initial_params) plot_data = (data_dim == 1) plot_params = (param_dim == 2) nplots = 2 if plot_data: nplots += 1 if plot_params: nplots += 1 plot_rows, plot_cols = util.nsubplots(nplots) if config['optimizer'] == 'BFGS': obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain) logger = opt.OptimLogger(lambda params, iter: obj_fun(params), store_freq=1, print_freq=10) params = opt.bfgs(autograd.value_and_grad(obj_fun), initial_params, logger.callback, config['num_epochs']) if config['optimizer'] == 'SGD': B = config['batch_size'] M = N / B # num_minibatches_per_epoch (num iter per epoch) max_iters = config['num_epochs'] * M grad_fun = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain) #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain) obj_fun = lambda params, iter: model.PNLL(params, Xtrain, Ytrain) logger = opt.OptimLogger(obj_fun, store_freq=M, print_freq=M*10, store_params=plot_params) if config.has_key('lr_fun'): if config['lr_fun'] == 'exp': lr_fun = lambda iter: opt.lr_exp_decay(iter, config['init_lr'], config['lr_decay']) elif config['lr_fun'] == 'const': lr_fun = opt.const_lr(config['init_lr']) else: raise ValueError('Unknown lr-fun {}'.format(lr_fun)) #sgd_fun = config['sgd-fun'] #params = sgd_fun(grad_fun, initial_params, logger.callback, \ # max_iters, lr_fun, *config['args']) if config['sgd-method'] == 'momentum': params = opt.sgd(grad_fun, initial_params, logger.callback, \ max_iters, lr_fun, config['mass']) elif config['sgd-method'] == 'RMSprop': params = opt.rmsprop(grad_fun, initial_params, logger.callback, \ max_iters, lr_fun, config['grad_sq_decay']) elif config['sgd-method'] == 'ADAM': params = opt.adam(grad_fun, initial_params, logger.callback, \ max_iters, lr_fun, config['grad_decay'], config['grad_sq_decay']) elif config['sgd-method'] == 'AutoADAM': eval_fn = lambda params: model.PNLL(params, Xtrain, Ytrain) params, lr, scores = opt.autoadam(grad_fun, initial_params, logger.callback, \ max_iters, eval_fn, config['auto-method']) config['init_lr'] = lr config['lr_fun'] = 'const' ttl = config_to_str(config) print 'autoadam: chose {:0.3f} as lr'.format(lr) print scores else: raise ValueError('Unknown SGD method {}'.format(config['method'])) training_loss = model.PNLL(params, Xtrain, Ytrain) print 'finished fitting, training loss {:0.3f}, {} obj calls, {} grad calls'.\ format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls) fig = plt.figure() ax = fig.add_subplot(plot_rows, plot_cols, 1) opt.plot_loss_trace(logger.obj_trace, loss_opt, ax) ax.set_title('final objective {:0.3f}'.format(training_loss)) ax.set_xlabel('epochs') ax = fig.add_subplot(plot_rows, plot_cols, 2) ax.plot(logger.grad_norm_trace) ax.set_title('gradient norm vs num updates') if plot_data: ax = fig.add_subplot(plot_rows, plot_cols, 3) predict_fun = lambda X: model.predictions(params, X) demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax) if plot_params: ax = fig.add_subplot(plot_rows, plot_cols, 4) loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain) demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax) demo.plot_param_trace_2d(logger.param_trace, ax) fig.suptitle(ttl) folder = 'figures/linreg-sgd' fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl)) plt.savefig(fname) return training_loss