def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = npr.RandomState( npr.RandomState(global_seed + i_hyper).randint(1000)) seed = i_hyper * 10**6 + i_iter # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % N_batches == 0: learning_curve_dict['learning_curve'].append( loss_fun(x, **train_data)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale'])) W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) #callback(W_opt, N_iters) return W_opt, learning_curve_dict
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = RandomState((seed, i_hyper, i_iter)) # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data["X"][idxs], train_data["T"][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0: learning_curve_dict["learning_curve"].append(loss_fun(x, **train_data)) learning_curve_dict["grad_norm"].append(np.linalg.norm(g)) learning_curve_dict["weight_norm"].append(np.linalg.norm(x)) learning_curve_dict["velocity_norm"].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) rs = RandomState((seed, i_hyper)) W0 = fill_parser(parser, np.exp(cur_hyperparams["log_param_scale"])) W0 *= rs.randn(W0.size) alphas = np.exp(cur_hyperparams["log_alphas"]) betas = logit(cur_hyperparams["invlogit_betas"]) L2_reg = fill_parser(parser, np.exp(fixed_hyperparams["log_L2_reg"])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) # callback(W_opt, N_iters) return W_opt, learning_curve_dict
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = npr.RandomState( npr.RandomState(global_seed + i_hyper).randint(1000)) seed = i_hyper * 10**6 + i_iter # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve = [] def callback(x, i_iter): if i_iter % N_batches == 0: learning_curve.append(loss_fun(x, **train_data)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) V0 = np.zeros(W0.size) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) return W_opt, learning_curve
def primal_optimizer(hyperparam_vect, i_hyper): learning_curve = [] def callback(x, i_iter): learning_curve.append(loss_fun(x)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(hash(i_hyper)).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(loss_fun), kylist(W0, alphas, betas, L2_reg), callback) callback(W_opt, N_iters) return W_opt, learning_curve
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): seed = i_hyper * 10**6 + i_iter idxs = npr.RandomState(seed).randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve = [] def callback(x, v, g, i_iter): if i_iter % N_batches == 0: learning_curve.append(loss_fun(x, **train_data)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) V0 = np.zeros(W0.size) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) return W_opt, learning_curve
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = npr.RandomState(npr.RandomState(global_seed + i_hyper).randint(1000)) seed = i_hyper * 10**6 + i_iter # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve = [] def callback(x, i_iter): if i_iter % N_batches == 0: learning_curve.append(loss_fun(x, **train_data)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale'])) W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) callback(W_opt, N_iters) return W_opt, learning_curve
def primal_optimizer(hyperparam_vect, i_hyper): def indexed_loss_fun(w, L2_vect, i_iter): rs = npr.RandomState(npr.RandomState(global_seed + i_hyper).randint(1000)) seed = i_hyper * 10**6 + i_iter # Deterministic seed needed for backwards pass. idxs = rs.randint(N_train, size=batch_size) return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect) learning_curve_dict = defaultdict(list) def callback(x, v, g, i_iter): if i_iter % thin == 0: learning_curve_dict['learning_curve'].append(loss_fun(x, **train_data)) learning_curve_dict['grad_norm'].append(np.linalg.norm(g)) learning_curve_dict['weight_norm'].append(np.linalg.norm(x)) learning_curve_dict['velocity_norm'].append(np.linalg.norm(v)) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale'])) W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg'])) W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback) #callback(W_opt, N_iters) return W_opt, learning_curve_dict