def run(): train_data, valid_data, test_data = load_data_subset( N_train, N_valid, N_test) kernel = make_sq_exp_kernel(L0) def loss_fun(transform, train_data, valid_data): train_data = augment_data(train_data, transform) return weighted_neighbors_loss(train_data, valid_data, kernel) loss_grad = batchwise_function(grad(loss_fun)) loss_fun = batchwise_function(loss_fun) batch_idxs = BatchList(N_valid, batch_size) A = np.eye(N_pix) valid_losses = [loss_fun(A, train_data, valid_data)] test_losses = [loss_fun(A, train_data, test_data)] A += A_init_scale * npr.randn(N_pix, N_pix) for meta_iter in range(N_meta_iters): print "Iter {0} valid {1} test {2}".format(meta_iter, valid_losses[-1], test_losses[-1]) for idxs in batch_idxs: valid_batch = [x[idxs] for x in valid_data] d_A = loss_grad(A, train_data, valid_batch) A -= meta_alpha * (d_A + meta_L1 * np.sign(A)) valid_losses.append(loss_fun(A, train_data, valid_data)) test_losses.append(loss_fun(A, train_data, test_data)) return A, valid_losses, test_losses
def run(): train_data, valid_data, test_data = load_data_subset(N_train, N_valid, N_test) kernel = make_sq_exp_kernel(L0) def loss_fun(transform, train_data, valid_data): train_data = augment_data(train_data, transform) return weighted_neighbors_loss(train_data, valid_data, kernel) loss_grad = batchwise_function(grad(loss_fun)) loss_fun = batchwise_function(loss_fun) batch_idxs = BatchList(N_valid, batch_size) A = np.eye(N_pix) valid_losses = [loss_fun(A, train_data, valid_data)] test_losses = [loss_fun(A, train_data, test_data)] A += A_init_scale * npr.randn(N_pix, N_pix) for meta_iter in range(N_meta_iters): print "Iter {0} valid {1} test {2}".format( meta_iter, valid_losses[-1], test_losses[-1]) for idxs in batch_idxs: valid_batch = [x[idxs] for x in valid_data] d_A = loss_grad(A, train_data, valid_batch) A -= meta_alpha * (d_A + meta_L1 * np.sign(A)) valid_losses.append(loss_fun(A, train_data, valid_data)) test_losses.append( loss_fun(A, train_data, test_data)) return A, valid_losses, test_losses
def run(): (train_images, train_labels), (val_images, val_labels), (test_images, test_labels) \ = load_data_subset(N_train_data, N_val_data, N_test_data) batch_idxs = BatchList(N_train_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = parser.N hyperparser = WeightsParser() hyperparser.add_weights('log_L2_reg', (N_weights, )) metas = np.zeros(hyperparser.N) print "Number of hyperparameters to be trained:", hyperparser.N npr.seed(0) hyperparser.set(metas, 'log_L2_reg', log_L2_reg_scale + np.ones(N_weights)) def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg')) return loss_fun(x, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) def meta_loss_fun(x, meta_params): # To be optimized in the outer loop. L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg')) log_prior = -meta_L2_reg * np.dot(L2_reg.ravel(), L2_reg.ravel()) return loss_fun(x, X=val_images, T=val_labels) - log_prior def test_loss_fun(x): # To measure actual performance. return loss_fun(x, X=test_images, T=test_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, metas) learning_curve = results['learning_curve'] validation_loss = results['M_final'] test_loss = test_loss_fun(results['x_final']) output.append((learning_curve, validation_loss, test_loss, parser.get(np.exp(hyperparser.get(metas, 'log_L2_reg')), (('weights', 0))))) metas -= results['dMd_meta'] * meta_stepsize print "Meta iteration {0} Valiation loss {1} Test loss {2}"\ .format(i, validation_loss, test_loss) return output
def run(): (train_images, train_labels), (val_images, val_labels), (test_images, test_labels) \ = load_data_subset(N_train_data, N_val_data, N_test_data) batch_idxs = BatchList(N_train_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = parser.N hyperparser = WeightsParser() hyperparser.add_weights('log_L2_reg', (N_weights,)) metas = np.zeros(hyperparser.N) print "Number of hyperparameters to be trained:", hyperparser.N npr.seed(0) hyperparser.set(metas, 'log_L2_reg', log_L2_reg_scale + np.ones(N_weights)) def indexed_loss_fun(x, meta_params, idxs): # To be optimized by SGD. L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg')) return loss_fun(x, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) def meta_loss_fun(x, meta_params): # To be optimized in the outer loop. L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg')) log_prior = -meta_L2_reg * np.dot(L2_reg.ravel(), L2_reg.ravel()) return loss_fun(x, X=val_images, T=val_labels) - log_prior def test_loss_fun(x): # To measure actual performance. return loss_fun(x, X=test_images, T=test_labels) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) v0 = npr.randn(N_weights) * velocity_scale x0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, metas) learning_curve = results['learning_curve'] validation_loss = results['M_final'] test_loss = test_loss_fun(results['x_final']) output.append((learning_curve, validation_loss, test_loss, parser.get(np.exp(hyperparser.get(metas, 'log_L2_reg')), (('weights', 0))))) metas -= results['dMd_meta'] * meta_stepsize print "Meta iteration {0} Valiation loss {1} Test loss {2}"\ .format(i, validation_loss, test_loss) return output
def build_test_images(): if os.path.isfile('test_images.pkl'): with open('test_images.pkl') as f: all_images = pickle.load(f) else: vert_stripes = np.zeros((L, L)) vert_stripes[:, ::4] = 1.0 vert_stripes[:, 1::4] = 1.0 horz_stripes = np.zeros((L, L)) horz_stripes[::4, :] = 1.0 horz_stripes[1::4, :] = 1.0 mnist_imgs = load_data_subset(4)[0][0] all_images = np.concatenate((vert_stripes.reshape( 1, L * L), horz_stripes.reshape(1, L * L), mnist_imgs), axis=0) with open('test_images.pkl', 'w') as f: pickle.dump(all_images, f) return all_images
def build_test_images(): if os.path.isfile('test_images.pkl'): with open('test_images.pkl') as f: all_images = pickle.load(f) else: vert_stripes = np.zeros((L, L)) vert_stripes[:, ::4] = 1.0 vert_stripes[:, 1::4] = 1.0 horz_stripes = np.zeros((L, L)) horz_stripes[::4, :] = 1.0 horz_stripes[1::4, :] = 1.0 mnist_imgs = load_data_subset(4)[0][0] all_images = np.concatenate((vert_stripes.reshape(1, L * L), horz_stripes.reshape(1, L * L), mnist_imgs), axis=0) with open('test_images.pkl', 'w') as f: pickle.dump(all_images, f) return all_images
def run(): (train_images, train_labels),\ (valid_images, valid_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests) batch_idxs = BatchList(N_train, batch_size) N_iters = N_epochs * len(batch_idxs) parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weight_types = len(parser.names) hyperparams = VectorParser() hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg) hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale) hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas) hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas) def indexed_loss_fun(w, log_L2_reg, i): idxs = batch_idxs[i % len(batch_idxs)] partial_vects = [ np.full(parser[name].size, np.exp(log_L2_reg[i])) for i, name in enumerate(parser.names) ] L2_reg_vect = np.concatenate(partial_vects, axis=0) return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect) def train_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=train_images, T=train_labels) def valid_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=valid_images, T=valid_labels) def tests_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=tests_images, T=tests_labels) all_learning_curves = [] all_x = [] def hyperloss_grad(hyperparam_vect, i): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append( loss_fun(x, X=train_images, T=train_labels)) npr.seed(i) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [ np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names) ] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['log_L2_reg'] = results['dMd_meta'] weights_grad = parser.new_vect(W0 * results['dMd_x']) hypergrads['log_param_scale'] = [ np.sum(weights_grad[name]) for name in parser.names ] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = ( results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) return hypergrads.vect add_fields = ['train_loss', 'valid_loss', 'tests_loss'] meta_results = {field: [] for field in add_fields + hyperparams.names} def meta_callback(hyperparam_vect, i): print "Meta iter {0}".format(i) x = all_x[-1] cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy()) log_L2_reg = cur_hyperparams['log_L2_reg'] for field in cur_hyperparams.names: meta_results[field].append(cur_hyperparams[field]) meta_results['train_loss'].append(train_loss_fun(x)) meta_results['valid_loss'].append(valid_loss_fun(x)) meta_results['tests_loss'].append(tests_loss_fun(x)) final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback, N_meta_iter, meta_alpha) meta_results['all_learning_curves'] = all_learning_curves parser.vect = None # No need to pickle zeros return meta_results, parser
def run(): (train_images, train_labels),\ (valid_images, valid_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests) batch_idxs = BatchList(N_train, batch_size) N_iters = N_epochs * len(batch_idxs) parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weight_types = len(parser.names) hyperparams = VectorParser() hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg) hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale) hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas) hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas) def indexed_loss_fun(w, log_L2_reg, i): idxs = batch_idxs[i % len(batch_idxs)] partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i])) for i, name in enumerate(parser.names)] L2_reg_vect = np.concatenate(partial_vects, axis=0) return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect) def train_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=train_images, T=train_labels) def valid_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=valid_images, T=valid_labels) def tests_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=tests_images, T=tests_labels) all_learning_curves = [] all_x = [] def hyperloss(hyperparam_vect, i): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append(loss_fun(x, X=train_images, T=train_labels)) npr.seed(i) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names)] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback) all_x.append(getval(W_opt)) all_learning_curves.append(learning_curve) return valid_loss_fun(W_opt) hyperloss_grad = grad(hyperloss) add_fields = ['train_loss', 'valid_loss', 'tests_loss'] meta_results = {field : [] for field in add_fields + hyperparams.names} def meta_callback(hyperparam_vect, i): x = all_x[-1] cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy()) log_L2_reg = cur_hyperparams['log_L2_reg'] for field in cur_hyperparams.names: meta_results[field].append(cur_hyperparams[field]) meta_results['train_loss'].append(train_loss_fun(x)) meta_results['valid_loss'].append(valid_loss_fun(x)) meta_results['tests_loss'].append(tests_loss_fun(x)) final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback, N_meta_iter, meta_alpha) meta_results['all_learning_curves'] = all_learning_curves parser.vect = None # No need to pickle zeros return meta_results, parser
def run(): (train_images, train_labels),\ (valid_images, valid_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests) batch_idxs = BatchList(N_train, batch_size) N_iters = N_epochs * len(batch_idxs) parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weight_types = len(parser.names) hyperparams = VectorParser() hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg) hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale) hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas) hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas) def train_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=train_images, T=train_labels) def valid_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=valid_images, T=valid_labels) def tests_loss_fun(w, log_L2_reg=0.0): return loss_fun(w, X=tests_images, T=tests_labels) all_learning_curves = [] all_x = [] def hyperloss_grad(hyperparam_vect, ii): learning_curve = [] def callback(x, i): if i % len(batch_idxs) == 0: learning_curve.append(loss_fun(x, X=train_images, T=train_labels)) def indexed_loss_fun(w, log_L2_reg, j): # idxs = batch_idxs[i % len(batch_idxs)] npr.seed(1000 * ii + j) idxs = npr.randint(N_train, size=len(batch_idxs)) partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i])) for i, name in enumerate(parser.names)] L2_reg_vect = np.concatenate(partial_vects, axis=0) return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect) npr.seed(ii) N_weights = parser.vect.size V0 = np.zeros(N_weights) cur_hyperparams = hyperparams.new_vect(hyperparam_vect) layer_param_scale = [np.full(parser[name].size, np.exp(cur_hyperparams['log_param_scale'][i])) for i, name in enumerate(parser.names)] W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0) alphas = np.exp(cur_hyperparams['log_alphas']) betas = logit(cur_hyperparams['invlogit_betas']) log_L2_reg = cur_hyperparams['log_L2_reg'] results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0, alphas, betas, log_L2_reg, callback=callback) hypergrads = hyperparams.copy() hypergrads['log_L2_reg'] = results['dMd_meta'] weights_grad = parser.new_vect(W0 * results['dMd_x']) hypergrads['log_param_scale'] = [np.sum(weights_grad[name]) for name in parser.names] hypergrads['log_alphas'] = results['dMd_alphas'] * alphas hypergrads['invlogit_betas'] = (results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas'])) all_x.append(results['x_final']) all_learning_curves.append(learning_curve) return hypergrads.vect add_fields = ['train_loss', 'valid_loss', 'tests_loss', 'iter_num'] meta_results = {field : [] for field in add_fields + hyperparams.names} def meta_callback(hyperparam_vect, i): if i % N_meta_thin == 0: print "Meta iter {0}".format(i) x = all_x[-1] cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy()) log_L2_reg = cur_hyperparams['log_L2_reg'] for field in cur_hyperparams.names: meta_results[field].append(cur_hyperparams[field]) meta_results['train_loss'].append(train_loss_fun(x)) meta_results['valid_loss'].append(valid_loss_fun(x)) meta_results['tests_loss'].append(tests_loss_fun(x)) meta_results['iter_num'].append(i) final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback, N_meta_iter, meta_alpha, meta_gamma) meta_results['all_learning_curves'] = all_learning_curves parser.vect = None # No need to pickle zeros return meta_results, parser