def test_sgd3(): N_weights = 5 W0 = 0.1 * npr.randn(N_weights) V0 = 0.1 * npr.randn(N_weights) N_data = 12 batch_size = 4 num_epochs = 3 batch_idxs = BatchList(N_data, batch_size) alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs) betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs) meta = 0.1 * npr.randn(N_weights * 2) A = npr.randn(N_data, N_weights) def loss_fun(W, meta, i=None): idxs = batch_idxs.all_idxs if i is None else batch_idxs[ i % len(batch_idxs)] sub_A = A[idxs, :] return np.dot( np.dot(W + meta[:N_weights] + meta[N_weights:], np.dot(sub_A.T, sub_A)), W) def meta_loss_fun(w, meta): return np.dot(w, w) + np.dot(meta, meta) def full_loss(W0, V0, alphas, betas, meta): result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) return loss_fun(result['x_final'], meta) def meta_loss(W0, V0, alphas, betas, meta): result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) return meta_loss_fun(result['x_final'], meta) result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'], result['dMd_betas'], result['dMd_meta']) d_num = nd(meta_loss, W0, V0, alphas, betas, meta) for i, (an, num) in enumerate(zip(d_an, d_num)): assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \ "Type {0}, diffs are: {1}".format(i, an - num) result = sgd3(loss_fun, loss_fun, W0, V0, alphas, betas, meta) d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'], result['dMd_betas'], result['dMd_meta']) d_num = nd(full_loss, W0, V0, alphas, betas, meta) for i, (an, num) in enumerate(zip(d_an, d_num)): assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \ "Type {0}, diffs are: {1}".format(i, an - num)
def hyperloss_grad(hyperparam_vect, ii): learning_curve = [] params_curve = [] def callback(x, i): params_curve.append(x) learning_curve.append(loss_fun(x)) def indexed_loss_fun(w, log_L2_reg, j): return loss_fun(w) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = init_params V0 = cur_hyperparams['V0'] alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = 0.0 results = sgd3(indexed_loss_fun, loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['V0'] = results['dMd_v'] * 0 hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = ( results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) all_param_curves.append(params_curve) return hypergrads.vect
def hyperloss_grad(hyperparam_vect, i): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append(loss_fun(x, X=train_images, T=train_labels)) npr.seed(i) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names)] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['log_L2_reg'] = results['dMd_meta'] weights_grad = parser.new_vect(W0 * results['dMd_x']) hypergrads['log_param_scale'] = [np.sum(weights_grad[name]) for name in parser.names] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = (results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) return hypergrads.vect
def hyperloss_grad(hyperparam_vect, ii): learning_curve = [] params_curve = [] def callback(x, i): params_curve.append(x) learning_curve.append(loss_fun(x)) def indexed_loss_fun(w, log_L2_reg, j): return loss_fun(w) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) W0 = np.ones(N_weights) * init_param_scale V0 = cur_hyperparams['V0'] alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = 0.0 results = sgd3(indexed_loss_fun, loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['V0'] = results['dMd_v'] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = (results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) all_param_curves.append(params_curve) return hypergrads.vect
def test_sgd3(): N_weights = 5 W0 = 0.1 * npr.randn(N_weights) V0 = 0.1 * npr.randn(N_weights) N_data = 12 batch_size = 4 num_epochs = 3 batch_idxs = BatchList(N_data, batch_size) alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs) betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs) meta = 0.1 * npr.randn(N_weights * 2) A = npr.randn(N_data, N_weights) def loss_fun(W, meta, i=None): idxs = batch_idxs.all_idxs if i is None else batch_idxs[i % len(batch_idxs)] sub_A = A[idxs, :] return np.dot(np.dot(W + meta[:N_weights] + meta[N_weights:], np.dot(sub_A.T, sub_A)), W) def meta_loss_fun(w, meta): return np.dot(w, w) + np.dot(meta, meta) def full_loss(W0, V0, alphas, betas, meta): result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) return loss_fun(result['x_final'], meta) def meta_loss(W0, V0, alphas, betas, meta): result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) return meta_loss_fun(result['x_final'], meta) result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'], result['dMd_betas'], result['dMd_meta']) d_num = nd(meta_loss, W0, V0, alphas, betas, meta) for i, (an, num) in enumerate(zip(d_an, d_num)): assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \ "Type {0}, diffs are: {1}".format(i, an - num) result = sgd3(loss_fun, loss_fun, W0, V0, alphas, betas, meta) d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'], result['dMd_betas'], result['dMd_meta']) d_num = nd(full_loss, W0, V0, alphas, betas, meta) for i, (an, num) in enumerate(zip(d_an, d_num)): assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \ "Type {0}, diffs are: {1}".format(i, an - num)
def meta_loss(W0, V0, alphas, betas, meta): result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta) return meta_loss_fun(result['x_final'], meta)