def hyperloss_grad(hyperparam_vect, i): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append(loss_fun(x, X=train_images, T=train_labels)) npr.seed(i) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names)] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['log_L2_reg'] = results['dMd_meta'] weights_grad = parser.new_vect(W0 * results['dMd_x']) hypergrads['log_param_scale'] = [np.sum(weights_grad[name]) for name in parser.names] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = (results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) return hypergrads.vect
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, meta_vect, i_iter): (train_data, train_labels, L2_vect) = meta return loss_fun(w, train_data, train_labels, L2_vect) #return loss_fun(w, train_data['X'], train_data['T'], L2_vect + np.sum(fake_data.ravel())) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0: # learning_curve_dict['learning_curve'].append(loss_fun(x, getval(cur_hyperparams['fake_data']), fake_labels)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) fake_data = cur_hyperparams['fake_data'] rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale'])) W0 *= rs.randn(W0.size) alphas = np.exp(fixed_hyperparams['log_alphas']) betas = logit(fixed_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg'])) meta = kylist(fake_data, fake_labels, L2_reg) W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, meta), parser, callback=callback) cur_primal_results['weights'] = getval(W_opt).copy() cur_primal_results['learning_curve'] = getval(learning_curve_dict) return W_opt, learning_curve_dict
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = RandomState((seed, i_hyper, i_iter)) # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data["X"][idxs], train_data["T"][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0: learning_curve_dict["learning_curve"].append(loss_fun(x, **train_data)) learning_curve_dict["grad_norm"].append(np.linalg.norm(g)) learning_curve_dict["weight_norm"].append(np.linalg.norm(x)) learning_curve_dict["velocity_norm"].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(cur_hyperparams["log_param_scale"])) W0 *= rs.randn(W0.size) alphas = np.exp(cur_hyperparams["log_alphas"]) betas = logit(cur_hyperparams["invlogit_betas"]) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams["log_L2_reg"])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) # callback(W_opt, N_iters) return W_opt, learning_curve_dict
def hyperloss_grad(hyperparam_vect, ii): learning_curve = [] params_curve = [] def callback(x, i): params_curve.append(x) learning_curve.append(loss_fun(x)) def indexed_loss_fun(w, log_L2_reg, j): return loss_fun(w) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = np.ones(N_weights) * init_param_scale V0 = cur_hyperparams['V0'] alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = 0.0 results = sgd3(indexed_loss_fun, loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['V0'] = results['dMd_v'] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = (results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) all_param_curves.append(params_curve) return hypergrads.vect
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = RandomState((seed, i_hyper, i_iter)) # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0 or i_iter == N_iters or i_iter == 0: learning_curve_dict['learning_curve'].append(loss_fun(x, **train_data)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) learning_curve_dict['iteration'].append(i_iter + 1) print "iteration", i_iter cur_hyperparams = hyperparams.new_vect(hyperparam_vect) rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= rs.randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg'])) W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), parser, callback=callback) return W_opt, learning_curve_dict
def primal_optimizer(hyperparam_vect, i_hyper): learning_curve = [] def callback(x, i_iter): learning_curve.append(loss_fun(x)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(hash(i_hyper)).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(loss_fun), kylist(W0, alphas, betas, L2_reg), callback) callback(W_opt, N_iters) return W_opt, learning_curve
def run(): N_iters = N_epochs parser, loss_fun = make_toy_funs() N_weights = parser.vect.size hyperparams = VectorParser() hyperparams["log_alphas"] = np.full(N_iters, init_log_alphas) hyperparams["invlogit_betas"] = np.full(N_iters, init_invlogit_betas) hyperparams["V0"] = np.full(N_weights, init_V0) forward_path = [] forward_learning_curve = [] def fwd_callback(x, i): print type(x[0]) forward_path.append(x.copy()) forward_learning_curve.append(loss_fun(x)) reverse_path = [] reverse_learning_curve = [] def reverse_callback(x, i): reverse_path.append(x.copy()) reverse_learning_curve.append(loss_fun(x)) def indexed_loss_fun(w, log_L2_reg, j): return loss_fun(w) cur_hyperparams = hyperparams W0 = init_params V0 = cur_hyperparams["V0"] alphas = np.exp(cur_hyperparams["log_alphas"]) betas = logit(cur_hyperparams["invlogit_betas"]) log_L2_reg = 0.0 sgd3_naive( indexed_loss_fun, W0, V0, alphas, betas, log_L2_reg, fwd_callback=fwd_callback, reverse_callback=reverse_callback, ) return forward_path, forward_learning_curve, reverse_path, reverse_learning_curve
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): seed = i_hyper * 10**6 + i_iter idxs = npr.RandomState(seed).randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve = [] def callback(x, v, g, i_iter): if i_iter % N_batches == 0: learning_curve.append(loss_fun(x, **train_data)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) V0 = np.zeros(W0.size) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) return W_opt, learning_curve
def hyperloss(hyperparam_vect, i): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append(loss_fun(x, X=train_images, T=train_labels)) npr.seed(i) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names)] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback) all_x.append(getval(W_opt)) all_learning_curves.append(learning_curve) return valid_loss_fun(W_opt)
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = npr.RandomState(npr.RandomState(global_seed + i_hyper).randint(1000)) seed = i_hyper * 10**6 + i_iter # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve = [] def callback(x, i_iter): if i_iter % N_batches == 0: learning_curve.append(loss_fun(x, **train_data)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) callback(W_opt, N_iters) return W_opt, learning_curve
def plot(): import matplotlib.pyplot as plt from matplotlib import rc rc('font',**{'family':'serif'}) with open('results.pkl') as f: results, parser, parsed_init_hypergrad = pickle.load(f) #rc('text', usetex=True) # ----- Small versions of stepsize schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) def layer_name(weight_key): return "Layer {num}".format(num=weight_key[1] + 1) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Learning rate') ax.set_xlabel('Schedule index') fig.set_size_inches((6,2.5)) ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Alpha and beta initial hypergradients ----- print "Plotting initial gradients..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('initial_gradient.png') plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Nice versions of Alpha and beta schedules for paper ----- print "Plotting full alpha and beta schedules curves..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') print "Plotting learning curves..." fig.clf() fig.set_size_inches((6,8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Learning curve info ----- print "Plotting extra learning curves..." fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Init scale and L2 reg ----- print "Plotting initialization distributions and regularization..." fig.clf() ax = fig.add_subplot(111) #ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'weights': ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i])) # Show lines for theoretical optimum. y1 = 1.0/np.sqrt(layer_sizes[0]) y2 = 1.0/np.sqrt(layer_sizes[1]) ax.plot(ax.get_xlim(), (y1, y1), 'b--') ax.plot(ax.get_xlim(), (y2, y2), 'k--') ax.set_xlabel('Meta iteration') ax.set_ylabel('Initial scale') #ax.set_yscale('log') #ax.legend(loc=1, frameon=False) fig.set_size_inches((2.5,2.5)) #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() ax = fig.add_subplot(111) #ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'biases': ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i])) ax.set_xlabel('Meta iteration') #ax.set_ylabel('Scale') #ax.set_yscale('log') #ax.set_ylabel('Log param scale') fig.set_size_inches((2.5,2.5)) ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'}) plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')
def plot(): import matplotlib.pyplot as plt with open("results.pkl") as f: results, parser = pickle.load(f) # ----- Nice versions of Alpha and beta schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(211) # ax.set_title('Alpha learning curves') for cur_results, name in zip(results["log_alphas"][-1].T, parser.names): ax.plot(np.exp(cur_results), "o-", label=name) # ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel("Step size", fontproperties="serif") ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={"family": "serif", "size": "12"}) ax = fig.add_subplot(212) # ax.set_title('Alpha learning curves') for cur_results, name in zip(results["invlogit_betas"][-1].T, parser.names): ax.plot(logit(cur_results), "o-", label=name) low, high = ax.get_ylim() ax.set_ylim([0, 1]) ax.set_xlabel("Learning Iteration", fontproperties="serif") ax.set_ylabel("Momentum", fontproperties="serif") fig.set_size_inches((6, 3)) # plt.show() plt.savefig("alpha_beta_paper.png") plt.savefig("alpha_beta_paper.pdf", pad_inches=0.05, bbox_inches="tight") fig.clf() fig.set_size_inches((6, 8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title("Primal learning curves") for i, y in enumerate(results["learning_curves"]): ax.plot(y["learning_curve"], "o-", label="Meta iter {0}".format(i)) ax.set_xlabel("Epoch number") ax.set_ylabel("Negative log prob") # ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title("Meta learning curves") losses = ["train_loss", "valid_loss", "tests_loss"] for loss_type in losses: ax.plot(results[loss_type], "o-", label=loss_type) ax.set_xlabel("Meta iter number") ax.set_ylabel("Negative log prob") ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title("Meta-gradient magnitude") ax.plot(results["meta_grad_magnitude"], "o-", label="Meta-gradient magnitude") ax.plot(results["meta_grad_angle"], "o-", label="Meta-gradient angle") ax.set_xlabel("Meta iter number") ax.set_ylabel("Meta-gradient Magnitude") ax.legend(loc=1, frameon=False) plt.savefig("learning_curves.png") # ----- Learning curve info ----- fig.clf() ax = fig.add_subplot(311) ax.set_title("Primal learning curves") for i, y in enumerate(results["learning_curves"]): ax.plot(y["grad_norm"], "o-", label="Meta iter {0}".format(i)) ax.set_xlabel("Epoch number") # ax.legend(loc=1, frameon=False) ax.set_title("Grad norm") ax = fig.add_subplot(312) for i, y in enumerate(results["learning_curves"]): ax.plot(y["weight_norm"], "o-", label="Meta iter {0}".format(i)) ax.set_xlabel("Epoch number") ax.legend(loc=1, frameon=False) ax.set_title("Weight norm") ax = fig.add_subplot(313) for i, y in enumerate(results["learning_curves"]): ax.plot(y["velocity_norm"], "o-", label="Meta iter {0}".format(i)) ax.set_xlabel("Epoch number") ax.set_title("Velocity norm") ax.legend(loc=1, frameon=False) plt.savefig("extra_learning_curves.png") # ----- Alpha and beta schedules ----- fig.clf() ax = fig.add_subplot(211) ax.set_title("Alpha learning curves") for i, y in enumerate(results["log_alphas"]): ax.plot(y, "o-", label="Meta iter {0}".format(i)) ax.set_xlabel("Primal iter number") # ax.set_ylabel('Log alpha') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title("Beta learning curves") for y in results["invlogit_betas"]: ax.plot(y, "o-") ax.set_xlabel("Primal iter number") ax.set_ylabel("Inv logit beta") plt.savefig("alpha_beta_curves.png") # ----- Init scale and L2 reg ----- fig.clf() ax = fig.add_subplot(211) ax.set_title("Init scale learning curves") for i, y in enumerate(zip(*results["log_param_scale"])): ax.plot(y, "o-", label=parser.names[i]) ax.set_xlabel("Meta iter number") ax.set_ylabel("Log param scale") ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title("L2 reg learning curves") for i, y in enumerate(zip(*results["log_L2_reg"])): ax.plot(y, "o-", label=parser.names[i]) ax.set_xlabel("Meta iter number") ax.set_ylabel("Log L2 reg") ax.legend(loc=1, frameon=False) plt.savefig("scale_and_reg.png")
def test_inv_logit(): assert np.allclose(inv_logit(logit(0.5)), 0.5, rtol=1e-3, atol=1e-4) assert np.allclose(inv_logit(logit(0.6)), 0.6, rtol=1e-3, atol=1e-4) assert np.allclose(inv_logit(logit(0.1)), 0.1, rtol=1e-3, atol=1e-4) assert np.allclose(inv_logit(logit(0.2)), 0.2, rtol=1e-3, atol=1e-4)
def plot(): import matplotlib.pyplot as plt with open('results.pkl') as f: results, parser = pickle.load(f) # ----- Nice versions of Alpha and beta schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(211) #ax.set_title('Alpha learning curves') ax.plot(np.exp(results['log_alphas'][-1]), 'o-', label="Step size") #ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax = fig.add_subplot(212) #ax.set_title('Alpha learning curves') ax.plot(logit(results['invlogit_betas'][-1]), 'go-', label="Momentum") low, high = ax.get_ylim() ax.set_ylim([low, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') #ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), # prop={'family':'serif', 'size':'12'}) fig.set_size_inches((6, 3)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() fig.set_size_inches((6, 8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Learning curve info ----- fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Alpha and beta schedules ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Alpha learning curves') for i, y in enumerate(results['log_alphas']): ax.plot(y, 'o-', label="Meta iter {0}".format(i)) ax.set_xlabel('Primal iter number') #ax.set_ylabel('Log alpha') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('Beta learning curves') for y in results['invlogit_betas']: ax.plot(y, 'o-') ax.set_xlabel('Primal iter number') ax.set_ylabel('Inv logit beta') plt.savefig('alpha_beta_curves.png') # ----- Init scale and L2 reg ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log param scale') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('L2 reg learning curves') for i, y in enumerate(zip(*results['log_L2_reg'])): ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log L2 reg') ax.legend(loc=1, frameon=False) plt.savefig('scale_and_reg.png')
def d_logit(x): return logit(x) * (1 - logit(x))
def plot(): import matplotlib.pyplot as plt from matplotlib import rc rc('font',**{'family':'serif'}) with open('results.pkl') as f: results, parser, parsed_init_hypergrad = pickle.load(f) #rc('text', usetex=True) # ----- Small versions of stepsize schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) def layer_name(weight_key): return "Layer {num}".format(num=weight_key[1] + 1) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Learning rate') ax.set_xlabel('Schedule index') fig.set_size_inches((6,2.5)) ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Alpha and beta initial hypergradients ----- print "Plotting initial gradients..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=nice_layer_name(name)) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('initial_gradient.png') plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Nice versions of Alpha and beta schedules for paper ----- print "Plotting full alpha and beta schedules curves..." fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name)) low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name)) ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') print "Plotting learning curves..." fig.clf() fig.set_size_inches((6,8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Nice learning curves for paper ----- print "Plotting nice learning curves for paper..." fig.clf() # ----- Primal learning curves ----- ax = fig.add_subplot(111) #ax.set_title('Primal learning curves') ax.plot(results['learning_curves'][0]['iteration'], results['learning_curves'][0]['learning_curve'], '-', label='Initial hypers') ax.plot(results['learning_curves'][-1]['iteration'], results['learning_curves'][-1]['learning_curve'], '-', label='Final hypers') ax.set_xlabel('Training iteration') ax.set_ylabel('Training loss') fig.set_size_inches((2.5,2.5)) ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'10'}) plt.savefig('learning_curves_paper.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() ax = fig.add_subplot(111) #ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] loss_names = ['Training loss', 'Validation loss', 'Test loss'] for loss_type, loss_name in zip(losses, loss_names): ax.plot(results[loss_type], 'o-', label=loss_name) ax.set_xlabel('Meta iteration') ax.set_ylabel('Predictive loss') ax.legend(loc=1, frameon=False) fig.set_size_inches((2.5,2.5)) ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'10'}) plt.savefig('meta_learning_curve_paper.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Extra learning curve info ----- print "Plotting extra learning curves..." fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Init scale and L2 reg ----- print "Plotting initialization distributions and regularization..." fig.clf() ax = fig.add_subplot(111) for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'weights': ax.plot(np.exp(y), 'o-', label='')#layer_name(parser.names[i])) ax.set_xlabel('Meta iteration') #ax.set_ylabel('Initial scale') # Show lines for theoretical optimum. y1 = 1.0/np.sqrt(layer_sizes[0]) y2 = 1.0/np.sqrt(layer_sizes[1]) ax.plot(ax.get_xlim(), (y2, y2), 'k--', label=r'$1/\sqrt{50}$') ax.plot(ax.get_xlim(), (y1, y1), 'b--', label=r'$1/\sqrt{784}$') ax.set_yticks([0.00, 1.0/np.sqrt(784), 0.10, 1.0/np.sqrt(50), 0.20, 0.25]) ax.set_yticklabels(['0.00', r"$1 / \sqrt{784}$", "0.10", r"$1 / \sqrt{50}$", "0.20", "0.25"]) fig.set_size_inches((2.5,2.5)) #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'}) plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() ax = fig.add_subplot(111) for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'biases': ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i])) ax.set_xlabel('Meta iteration') ax.set_ylabel('Initial scale') fig.set_size_inches((2.5,2.5)) ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'}) plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')
def test_logit(): assert np.allclose(logit(0), 0.5, rtol=1e-3, atol=1e-4) assert np.allclose(logit(-100), 0, rtol=1e-3, atol=1e-4) assert np.allclose(logit( 100), 1, rtol=1e-3, atol=1e-4)
def plot(): import matplotlib.pyplot as plt with open('results.pkl') as f: results, parser, parsed_init_hypergrad = pickle.load(f) # ----- Alpha and beta initial hypergradients ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=name) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'weights': ax.plot(cur_results, 'o-', label=name) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') ax = fig.add_subplot(413) for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=name) ax.set_ylabel('Step size Gradient', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names): if name[0] == 'biases': ax.plot(cur_results, 'o-', label=name) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum Gradient', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('initial_gradient.png') plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Nice versions of Alpha and beta schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=name) #ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(412) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(logit(cur_results), 'o-', label=name) low, high = ax.get_ylim() ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') ax = fig.add_subplot(413) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(np.exp(cur_results), 'o-', label=name) #ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={'family':'serif', 'size':'12'}) ax = fig.add_subplot(414) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(logit(cur_results), 'o-', label=name) low, high = ax.get_ylim() ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') fig.set_size_inches((6,8)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() fig.set_size_inches((6,8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Learning curve info ----- fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Alpha and beta schedules ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Alpha learning curves') for i, y in enumerate(results['log_alphas']): ax.plot(y, 'o-', label="Meta iter {0}".format(i)) ax.set_xlabel('Primal iter number') #ax.set_ylabel('Log alpha') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('Beta learning curves') for y in results['invlogit_betas']: ax.plot(y, 'o-') ax.set_xlabel('Primal iter number') ax.set_ylabel('Inv logit beta') plt.savefig('alpha_beta_curves.png') # ----- Init scale and L2 reg ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'weights': ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log param scale') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'biases': ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log param scale') ax.legend(loc=1, frameon=False) plt.savefig('scale_and_reg.png')
def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0): return 800 * logit(rosenbrock(W_vect) / 500)
def plot(): import matplotlib.pyplot as plt with open('results.pkl') as f: results, parser = pickle.load(f) # Fake data fig = plt.figure(0) fig.clf() ax = fig.add_subplot(1, 1, 1) ax.set_title("Fake Data") images = results['fake_data'][-1] plot_mnist_images(images, ax, ims_per_row=10) fig.set_size_inches((8, 12)) plt.savefig('fake_data.pdf', pad_inches=0.05, bbox_inches='tight') # Show first layer filters from the last meta-iteration. fig = plt.figure(0) fig.clf() ax = fig.add_subplot(1, 1, 1) ax.set_title("Weights") weights = results['example_weights'] parser.vect = weights weight_images = parser[('weights', 0)].T plot_mnist_images(weight_images, ax, ims_per_row=10) fig.set_size_inches((8, 12)) plt.savefig('first_layer_weights.pdf', pad_inches=0.05, bbox_inches='tight') # ----- Nice versions of Alpha and beta schedules for paper ----- fig = plt.figure(0) fig.clf() ax = fig.add_subplot(411) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(np.exp(cur_results), 'o-', label=name) #ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={ 'family': 'serif', 'size': '12' }) ax = fig.add_subplot(412) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'weights': ax.plot(logit(cur_results), 'o-', label=name) low, high = ax.get_ylim() ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') ax = fig.add_subplot(413) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['log_alphas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(np.exp(cur_results), 'o-', label=name) #ax.set_xlabel('Learning Iteration', fontproperties='serif') low, high = ax.get_ylim() ax.set_ylim([0, high]) ax.set_ylabel('Step size', fontproperties='serif') ax.set_xticklabels([]) ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5), prop={ 'family': 'serif', 'size': '12' }) ax = fig.add_subplot(414) #ax.set_title('Alpha learning curves') for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names): if name[0] == 'biases': ax.plot(logit(cur_results), 'o-', label=name) low, high = ax.get_ylim() ax.set_ylim([0, 1]) ax.set_xlabel('Learning Iteration', fontproperties='serif') ax.set_ylabel('Momentum', fontproperties='serif') fig.set_size_inches((6, 8)) #plt.show() plt.savefig('alpha_beta_paper.png') plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight') fig.clf() fig.set_size_inches((6, 8)) # ----- Primal learning curves ----- ax = fig.add_subplot(311) #ax.set_title('Primal learning curves') #for i, y in enumerate(results['learning_curves']): # ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i)) #ax.set_xlabel('Epoch number') #ax.set_ylabel('Negative log prob') #ax.legend(loc=1, frameon=False) ax = fig.add_subplot(312) ax.set_title('Meta learning curves') losses = ['train_loss', 'valid_loss', 'tests_loss'] for loss_type in losses: ax.plot(results[loss_type], 'o-', label=loss_type) ax.set_xlabel('Meta iter number') ax.set_ylabel('Negative log prob') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(313) ax.set_title('Meta-gradient magnitude') ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude') ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle') ax.set_xlabel('Meta iter number') ax.set_ylabel('Meta-gradient Magnitude') ax.legend(loc=1, frameon=False) plt.savefig('learning_curves.png') # ----- Learning curve info ----- fig.clf() ax = fig.add_subplot(311) ax.set_title('Primal learning curves') for i, y in enumerate(results['learning_curves']): ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') #ax.legend(loc=1, frameon=False) ax.set_title('Grad norm') ax = fig.add_subplot(312) for i, y in enumerate(results['learning_curves']): ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.legend(loc=1, frameon=False) ax.set_title('Weight norm') ax = fig.add_subplot(313) for i, y in enumerate(results['learning_curves']): ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i)) ax.set_xlabel('Epoch number') ax.set_title('Velocity norm') ax.legend(loc=1, frameon=False) plt.savefig('extra_learning_curves.png') # ----- Alpha and beta schedules ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Alpha learning curves') for i, y in enumerate(results['log_alphas']): ax.plot(y, 'o-', label="Meta iter {0}".format(i)) ax.set_xlabel('Primal iter number') #ax.set_ylabel('Log alpha') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('Beta learning curves') for y in results['invlogit_betas']: ax.plot(y, 'o-') ax.set_xlabel('Primal iter number') ax.set_ylabel('Inv logit beta') plt.savefig('alpha_beta_curves.png') # ----- Init scale and L2 reg ----- fig.clf() ax = fig.add_subplot(211) ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'weights': ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log param scale') ax.legend(loc=1, frameon=False) ax = fig.add_subplot(212) ax.set_title('Init scale learning curves') for i, y in enumerate(zip(*results['log_param_scale'])): if parser.names[i][0] == 'biases': ax.plot(y, 'o-', label=parser.names[i]) ax.set_xlabel('Meta iter number') ax.set_ylabel('Log param scale') ax.legend(loc=1, frameon=False) plt.savefig('scale_and_reg.png')