def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) X_uniform = npr.rand( N_weights) # Weights are uniform passed through an inverse cdf. bindict = { k: np.linspace(-1, 1, N_bins) * np.exp(log_param_scale) # Different cdf per layer. for k, v in parser.idxs_and_shapes.iteritems() } output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) #X0, dX_dbins = bininvcdf(W_uniform, bins) X0 = np.zeros(N_weights) dX_dbins = {} for k, cur_bins in bindict.iteritems(): cur_slice, cur_shape = parser.idxs_and_shapes[k] cur_xs = X_uniform[cur_slice] cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins) X0[cur_slice] = cur_X0 dX_dbins[k] = cur_dX_dbins results = sgd(indexed_loss_fun, batch_idxs, N_iters, X0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] learning_curve = results['learning_curve'] output.append((learning_curve, bindict)) # Update bins with one gradient step. for k, bins in bindict.iteritems(): dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k]) bins = bins - dL_dbins * bin_stepsize bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize bins.sort() # Sort in place. bindict[k] = bins return output
def run(oiter): # ----- Variable for this run ----- log_alpha_0 = all_log_alpha_0[oiter] print "Running job {0} on {1}".format(oiter + 1, socket.gethostname()) train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) V0 = npr.randn(N_weights) * velocity_scale losses = [] d_losses = [] alpha_0 = np.exp(log_alpha_0) for N_iters in all_N_iters: alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) losses = [] d_losses = [] for log_alpha_0 in all_log_alpha_0: npr.seed(0) V0 = npr.randn(N_weights) * velocity_scale alpha_0 = np.exp(log_alpha_0) alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data(normalize=True) train_images = train_images[:N_real_data, :] train_labels = train_labels[:N_real_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N fake_data = npr.randn(*(train_images[:N_fake_data, :].shape)) * init_fake_data_scale fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes) # One of each. def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs]) def meta_loss_fun(x): # To be optimized in the outer loop. return loss_fun(x, X=train_images, T=train_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, fake_data) learning_curve = results['learning_curve'] validation_loss = results['M_final'] output.append((learning_curve, validation_loss, fake_data)) fake_data -= results['dMd_meta'] * data_stepsize # Update data with one gradient step. print "Meta iteration {0} Valiation loss {1}".format(i, validation_loss) return output
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) V0 = npr.randn(N_weights) * velocity_scale W0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) learning_curve = results['learning_curve'] d_log_alphas = np.exp(log_alphas) * results['d_alphas'] output.append((learning_curve, log_alphas, d_log_alphas)) log_alphas = log_alphas - meta_alpha * d_log_alphas return output
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) losses = [] d_losses = [] for log_alpha_0 in log_stepsizes: npr.seed(0) V0 = npr.randn(N_weights) * velocity_scale alpha_0 = np.exp(log_alpha_0) alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) V0 = npr.randn(N_weights) * velocity_scale W0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) learning_curve = results['learning_curve'] d_log_alphas = np.exp(log_alphas) * results['d_alphas'] output.append((learning_curve, log_alphas, d_log_alphas)) log_alphas = log_alphas - meta_alpha * step_smooth( d_log_alphas, iter_per_epoch) return output
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) V0 = npr.randn(N_weights) * velocity_scale losses = [] d_losses = [] for N_iters in all_N_iters: alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) loss_curve = [] d_loss_curve = [] for log_param_scale in all_log_param_scale: print "log_param_scale {0}, N_iters {1}".format(log_param_scale, N_iters) npr.seed(1) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) loss_curve.append(results['loss_final']) d_loss_curve.append(d_log_loss(W0, results['d_x'])) losses.append(loss_curve) d_losses.append(d_loss_curve) with open('results.pkl', 'w') as f: pickle.dump((losses, d_losses), f)
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bins = np.linspace(-1,1,N_bins) * np.exp(log_param_scale) W_uniform = npr.rand(N_weights) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) W0, dW_dbins = bininvcdf(W_uniform, bins) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] dL_dbins = np.dot(dL_dx, dW_dbins) learning_curve = results['learning_curve'] output.append((learning_curve, bins)) bins = bins - dL_dbins * bin_stepsize bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize bins.sort() # Sort in place. return output
def run(): val_images, val_labels, test_images, test_labels, _ = load_data( normalize=True) val_images = val_images[:N_val_data, :] val_labels = val_labels[:N_val_data, :] truedatasize = np.std(val_images) test_images = test_images[:N_test_data, :] test_labels = test_labels[:N_test_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N fake_data = npr.randn( *(val_images[:N_fake_data, :].shape)) * init_fake_data_scale fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes) # One of each. def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs]) def meta_loss_fun(x, meta_params): # To be optimized in the outer loop. log_prior = -fake_data_L2_reg * np.dot(meta_params.ravel(), meta_params.ravel()) return loss_fun(x, X=val_images, T=val_labels) - log_prior def test_loss_fun(x): # To measure actual performance. return loss_fun(x, X=test_images, T=test_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, fake_data) learning_curve = results['learning_curve'] validation_loss = results['M_final'] fakedatasize = np.std(fake_data) / truedatasize test_loss = test_loss_fun(results['x_final']) output.append((learning_curve, validation_loss, test_loss, fake_data, fakedatasize)) fake_data -= results[ 'dMd_meta'] * data_stepsize # Update data with one gradient step. print "Meta iteration {0} Valiation loss {1} Test loss {2}"\ .format(i, validation_loss, test_loss) return output
def run(): train_images, train_labels, _, _, _ = load_data(normalize=True) train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bindict = {k : np.linspace(-1,1,N_bins) * np.exp(log_param_scale) # Different cdf per layer. for k, v in parser.idxs_and_shapes.iteritems()} output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) #X0, dX_dbins = bininvcdf(W_uniform, bins) X_uniform = npr.rand(N_weights) # Weights are uniform passed through an inverse cdf. X0 = np.zeros(N_weights) dX_dbins = {} for k, cur_bins in bindict.iteritems(): cur_slice, cur_shape = parser.idxs_and_shapes[k] cur_xs = X_uniform[cur_slice] cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins) X0[cur_slice] = cur_X0 dX_dbins[k] = cur_dX_dbins results = sgd(indexed_loss_fun, batch_idxs, N_iters, X0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] learning_curve = results['learning_curve'] output.append((learning_curve, bindict)) # Update bins with one gradient step. for k, bins in bindict.iteritems(): dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k]) bins = bins - dL_dbins * bin_stepsize bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize bindict[k] = np.sort(bins) bindict = bindict.copy() return output
def run(): train_images, train_labels, _, _, _ = load_data(normalize=True) train_images = train_images[:N_real_data, :] train_labels = train_labels[:N_real_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N #fake_data = npr.randn(*(train_images[:N_fake_data, :].shape)) fake_data = np.zeros(train_images[:N_fake_data, :].shape) one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int) fake_labels = one_hot(np.array(range(0, 10)), 10) # One of each label. def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs]) def meta_loss_fun(x): # To be optimized in the outer loop. return loss_fun(x, X=train_images, T=train_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, fake_data) learning_curve = results['learning_curve'] output.append((learning_curve, fake_data)) fake_data -= results[ 'dMd_meta'] * data_stepsize # Update data with one gradient step. return output
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bins = np.linspace(-1, 1, N_bins) * np.exp(log_param_scale) W_uniform = npr.rand(N_weights) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) W0, dW_dbins = bininvcdf(W_uniform, bins) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] dL_dbins = np.dot(dL_dx, dW_dbins) learning_curve = results['learning_curve'] output.append((learning_curve, bins)) bins = bins - dL_dbins * bin_stepsize bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize bins.sort() # Sort in place. return output
def run(): train_images, train_labels, _, _, _ = load_data(normalize=True) train_images = train_images[:N_real_data, :] train_labels = train_labels[:N_real_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N # fake_data = npr.randn(*(train_images[:N_fake_data, :].shape)) fake_data = np.zeros(train_images[:N_fake_data, :].shape) one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int) fake_labels = one_hot(np.array(range(0, 10)), 10) # One of each label. def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs]) def meta_loss_fun(x): # To be optimized in the outer loop. return loss_fun(x, X=train_images, T=train_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd2( indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, fake_data ) learning_curve = results["learning_curve"] output.append((learning_curve, fake_data)) fake_data -= results["dMd_meta"] * data_stepsize # Update data with one gradient step. return output
def run(): val_images, val_labels, test_images, test_labels, _ = load_data( normalize=True) val_images = val_images[:N_val_data, :] val_labels = val_labels[:N_val_data, :] true_data_scale = np.std(val_images) test_images = test_images[:N_test_data, :] test_labels = test_labels[:N_test_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) npr.seed(0) init_fake_data = npr.randn( *(val_images[:N_fake_data, :].shape)) * init_fake_data_scale one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int) fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes) # One of each. hyperparser = WeightsParser() hyperparser.add_weights('log_L2_reg', (1, )) hyperparser.add_weights('fake_data', init_fake_data.shape) metas = np.zeros(hyperparser.N) print "Number of hyperparameters to be trained:", hyperparser.N hyperparser.set(metas, 'log_L2_reg', init_log_L2_reg) hyperparser.set(metas, 'fake_data', init_fake_data) def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg')[0]) fake_data = hyperparser.get(meta_params, 'fake_data') return loss_fun(x, X=fake_data[idxs], T=fake_labels[idxs], L2_reg=L2_reg) def meta_loss_fun(x, meta_params): # To be optimized in the outer loop. fake_data = hyperparser.get(meta_params, 'fake_data') log_prior = -fake_data_L2_reg * np.dot(fake_data.ravel(), fake_data.ravel()) return loss_fun(x, X=val_images, T=val_labels) - log_prior def test_loss_fun(x): # To measure actual performance. return loss_fun(x, X=test_images, T=test_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) output = [] velocity = np.zeros(hyperparser.N) for i in range(N_meta_iter): print "L2 reg is ", np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), "| ", npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, metas) learning_curve = results['learning_curve'] validation_loss = results['M_final'] test_err = frac_err(results['x_final'], test_images, test_labels) fake_data_scale = np.std(hyperparser.get( metas, 'fake_data')) / true_data_scale test_loss = test_loss_fun(results['x_final']) output.append( (learning_curve, validation_loss, test_loss, fake_data_scale, np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), test_err)) # Do meta-SGD with momentum g = results['dMd_meta'] velocity = meta_momentum * velocity - (1.0 - meta_momentum) * g metas += velocity * meta_stepsize print "Meta iteration {0} Validation loss {1} Test loss {2} Test err {3}"\ .format(i, validation_loss, test_loss, test_err) return output, hyperparser.get(metas, 'fake_data')
def run(): val_images, val_labels, test_images, test_labels, _ = load_data(normalize=True) val_images = val_images[:N_val_data, :] val_labels = val_labels[:N_val_data, :] true_data_scale = np.std(val_images) test_images = test_images[:N_test_data, :] test_labels = test_labels[:N_test_data, :] batch_idxs = BatchList(N_fake_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) npr.seed(0) init_fake_data = npr.randn(*(val_images[:N_fake_data, :].shape)) * init_fake_data_scale one_hot = lambda x, K : np.array(x[:,None] == np.arange(K)[None, :], dtype=int) fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes) # One of each. hyperparser = WeightsParser() hyperparser.add_weights('log_L2_reg', (1,)) hyperparser.add_weights('fake_data', init_fake_data.shape) metas = np.zeros(hyperparser.N) print "Number of hyperparameters to be trained:", hyperparser.N hyperparser.set(metas, 'log_L2_reg', init_log_L2_reg) hyperparser.set(metas, 'fake_data', init_fake_data) def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg')[0]) fake_data=hyperparser.get(meta_params, 'fake_data') return loss_fun(x, X=fake_data[idxs], T=fake_labels[idxs], L2_reg=L2_reg) def meta_loss_fun(x, meta_params): # To be optimized in the outer loop. fake_data=hyperparser.get(meta_params, 'fake_data') log_prior = -fake_data_L2_reg * np.dot(fake_data.ravel(), fake_data.ravel()) return loss_fun(x, X=val_images, T=val_labels) - log_prior def test_loss_fun(x): # To measure actual performance. return loss_fun(x, X=test_images, T=test_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) output = [] for i in range(N_meta_iter): print "L2 reg is ", np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), "| ", npr.seed(0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, metas) learning_curve = results['learning_curve'] validation_loss = results['M_final'] test_err = frac_err(results['x_final'], test_images, test_labels) fake_data_scale = np.std(hyperparser.get(metas, 'fake_data')) / true_data_scale test_loss = test_loss_fun(results['x_final']) output.append((learning_curve, validation_loss, test_loss, fake_data_scale, np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), test_err)) metas -= results['dMd_meta'] * meta_stepsize print "Meta iteration {0} Validation loss {1} Test loss {2} Test err {3}"\ .format(i, validation_loss, test_loss, test_err) return output, hyperparser.get(metas, 'fake_data')