def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = RandomState( (seed, i_hyper, i_iter)) # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0: learning_curve_dict['learning_curve'].append( loss_fun(x, **train_data)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= rs.randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg'])) W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), parser, callback=callback) return W_opt, learning_curve_dict
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = RandomState((seed, i_hyper, i_iter)) # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0 or i_iter == N_iters or i_iter == 0: learning_curve_dict['learning_curve'].append(loss_fun(x, **train_data)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) learning_curve_dict['iteration'].append(i_iter + 1) print "iteration", i_iter cur_hyperparams = hyperparams.new_vect(hyperparam_vect) rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= rs.randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg'])) W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), parser, callback=callback) return W_opt, learning_curve_dict