def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) X_uniform = npr.rand( N_weights) # Weights are uniform passed through an inverse cdf. bindict = { k: np.linspace(-1, 1, N_bins) * np.exp(log_param_scale) # Different cdf per layer. for k, v in parser.idxs_and_shapes.iteritems() } output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) #X0, dX_dbins = bininvcdf(W_uniform, bins) X0 = np.zeros(N_weights) dX_dbins = {} for k, cur_bins in bindict.iteritems(): cur_slice, cur_shape = parser.idxs_and_shapes[k] cur_xs = X_uniform[cur_slice] cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins) X0[cur_slice] = cur_X0 dX_dbins[k] = cur_dX_dbins results = sgd(indexed_loss_fun, batch_idxs, N_iters, X0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] learning_curve = results['learning_curve'] output.append((learning_curve, bindict)) # Update bins with one gradient step. for k, bins in bindict.iteritems(): dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k]) bins = bins - dL_dbins * bin_stepsize bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize bins.sort() # Sort in place. bindict[k] = bins return output
def run(oiter): # ----- Variable for this run ----- log_alpha_0 = all_log_alpha_0[oiter] print "Running job {0} on {1}".format(oiter + 1, socket.gethostname()) train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) V0 = npr.randn(N_weights) * velocity_scale losses = [] d_losses = [] alpha_0 = np.exp(log_alpha_0) for N_iters in all_N_iters: alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) V0 = npr.randn(N_weights) * velocity_scale W0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) learning_curve = results['learning_curve'] d_log_alphas = np.exp(log_alphas) * results['d_alphas'] output.append((learning_curve, log_alphas, d_log_alphas)) log_alphas = log_alphas - meta_alpha * d_log_alphas return output
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(1) V0 = npr.randn(N_weights) * velocity_scale W0 = npr.randn(N_weights) * np.exp(log_param_scale) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) learning_curve = results['learning_curve'] d_log_alphas = np.exp(log_alphas) * results['d_alphas'] output.append((learning_curve, log_alphas, d_log_alphas)) log_alphas = log_alphas - meta_alpha * step_smooth( d_log_alphas, iter_per_epoch) return output
def hyperloss(hyperparam_vect, i_hyper, alphabets, verbose=True, report_train_loss=False): RS = RandomState((seed, i_hyper, "hyperloss")) alphabet = shuffle_alphabet(RS.choice(alphabets), RS) N_train = alphabet['X'].shape[0] - N_valid_dpts train_data = dictslice(alphabet, slice(None, N_train)) if report_train_loss: valid_data = dictslice(alphabet, slice(None, N_valid_dpts)) else: valid_data = dictslice(alphabet, slice(N_train, None)) def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True): RS = RandomState((seed, i_hyper, i_primal)) idxs = RS.permutation(N_train)[:batch_size] minibatch = dictslice(train_data, idxs) loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty) if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss)) return loss W0 = RS.randn(N_weights) * initialization_scale W_final = sgd(grad(primal_loss), hyperparam_vect, W0, alpha, beta, N_iters, callback=None) return reg_loss_fun(W_final, valid_data, hyperparam_vect, reg_penalty=False)
def hyperloss(transform_vect, i_hyper, record_results=False): def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal, i_script): RS = RandomState((seed, i_hyper, i_primal, i_script)) N_train = train_data[i_script]['X'].shape[0] idxs = RS.permutation(N_train)[:batch_size] minibatch = dictslice(train_data[i_script], idxs) loss = loss_from_latents(z_vect, transform_vect, i_script, minibatch) if i_primal % N_thin == 0 and i_script == 0: print "Iter {0}, full losses: train: {1}, valid: {2}".format( i_primal, total_loss(train_data, getval(z_vect)), total_loss(valid_data, getval(z_vect))) if i_script == 0: # Only add regularization once loss += regularization(z_vect) return loss def total_loss(data, z_vect): return np.mean([loss_from_latents(z_vect, transform_vect, i_script, data[i_script]) for i_script in range(N_scripts)]) z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(sub_primal_stochastic_loss), transform_vect, z_vect_0, alpha, beta, N_iters, N_scripts_per_iter, callback=None) valid_loss = total_loss(valid_data, z_vect_final) if record_results: results['valid_loss'].append(valid_loss) results['train_loss'].append(total_loss(train_data, z_vect_final)) # results['tests_loss'].append(total_loss(tests_data, z_vect_final)) return valid_loss
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) losses = [] d_losses = [] for log_alpha_0 in all_log_alpha_0: npr.seed(0) V0 = npr.randn(N_weights) * velocity_scale alpha_0 = np.exp(log_alpha_0) alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = len(parser.vect) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg) losses = [] d_losses = [] for log_alpha_0 in log_stepsizes: npr.seed(0) V0 = npr.randn(N_weights) * velocity_scale alpha_0 = np.exp(log_alpha_0) alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) losses.append(results['loss_final']) d_losses.append(d_log_loss(alpha_0, results['d_alphas'])) return losses, d_losses
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) V0 = npr.randn(N_weights) * velocity_scale losses = [] d_losses = [] for N_iters in all_N_iters: alphas = np.full(N_iters, alpha_0) betas = np.full(N_iters, beta_0) loss_curve = [] d_loss_curve = [] for log_param_scale in all_log_param_scale: print "log_param_scale {0}, N_iters {1}".format(log_param_scale, N_iters) npr.seed(1) W0 = npr.randn(N_weights) * np.exp(log_param_scale) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas) loss_curve.append(results['loss_final']) d_loss_curve.append(d_log_loss(W0, results['d_x'])) losses.append(loss_curve) d_losses.append(d_loss_curve) with open('results.pkl', 'w') as f: pickle.dump((losses, d_losses), f)
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bins = np.linspace(-1,1,N_bins) * np.exp(log_param_scale) W_uniform = npr.rand(N_weights) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) W0, dW_dbins = bininvcdf(W_uniform, bins) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] dL_dbins = np.dot(dL_dx, dW_dbins) learning_curve = results['learning_curve'] output.append((learning_curve, bins)) bins = bins - dL_dbins * bin_stepsize bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize bins.sort() # Sort in place. return output
def test_sgd(): N_weights = 5 W0 = 0.1 * npr.randn(N_weights) V0 = 0.1 * npr.randn(N_weights) N_data = 12 batch_size = 4 num_epochs = 3 batch_idxs = BatchList(N_data, batch_size) N_iter = num_epochs * len(batch_idxs) alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs) betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs) A = npr.randn(N_data, N_weights) def loss_fun(W, idxs): sub_A = A[idxs, :] return np.dot(np.dot(W, np.dot(sub_A.T, sub_A)), W) result = sgd(loss_fun, batch_idxs, N_iter, W0, V0, alphas, betas) d_x = result['d_x'] d_v = result['d_v'] d_alphas = result['d_alphas'] d_betas = result['d_betas'] def full_loss(W0, V0, alphas, betas): result = sgd(loss_fun, batch_idxs, N_iter, W0, V0, alphas, betas) x_final = result['x_final'] return loss_fun(x_final, batch_idxs.all_idxs) d_an = (d_x, d_v, d_alphas, d_betas) d_num = nd(full_loss, W0, V0, alphas, betas) for i, (an, num) in enumerate(zip(d_an, d_num)): assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \ "Type {0}, diffs are: {1}".format(i, an - num)
def hyperloss(transform_vect, i_hyper, record_results=False): def primal_stochastic_loss(z_vect, transform_vect, i_primal): RS = RandomState((seed, i_hyper, i_primal)) loss = 0.0 for _ in range(N_scripts_per_iter): i_script = RS.randint(N_scripts) N_train = train_data[i_script]["X"].shape[0] idxs = RS.permutation(N_train)[:batch_size] minibatch = dictslice(train_data[i_script], idxs) loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch) loss /= N_scripts_per_iter reg = regularization(z_vect) # if i_primal % 10 == 0: # print "Iter {0}, loss {1}, reg {2}".format(i_primal, getval(loss), getval(reg)) # print "Full losses: train: {0}, valid: {1}".format( # total_loss(train_data, getval(z_vect)), # total_loss(valid_data, getval(z_vect))) return loss + reg def total_loss(data, z_vect): return np.mean( [loss_from_latents(z_vect, transform_vect, i_script, data[i_script]) for i_script in range(N_scripts)] ) z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_stochastic_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) valid_loss = total_loss(valid_data, z_vect_final) if record_results: results["valid_loss"].append(valid_loss) results["train_loss"].append(total_loss(train_data, z_vect_final)) # results['tests_loss'].append(total_loss(tests_data, z_vect_final)) return valid_loss
def train_z(data, transform_vect, RS): def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = likelihood_loss(w_vect, data) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_init_scale) return sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters)
def train_z(data, w_vect_0, reg): N_data = data['X'].shape[0] def primal_loss(w_vect, reg, i_primal, record_results=False): RS = RandomState((seed, i_primal, "primal")) idxs = RS.randint(N_data, size=batch_size) minibatch = dictslice(data, idxs) loss = loss_fun(w_vect, **minibatch) reg = regularization(w_vect, reg) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss)) return loss + reg return sgd(grad(primal_loss), reg, w_vect_0, alpha, beta, N_iters)
def train_z(data, z_vect_0, transform): N_data = data['X'].shape[0] def primal_loss(z_vect, transform, i_primal, record_results=False): RS = RandomState((seed, i_primal, "primal")) idxs = RS.randint(N_data, size=batch_size) minibatch = dictslice(data, idxs) w_vect = transform_weights(z_vect, transform) loss = loss_fun(w_vect, **minibatch) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss)) return loss + reg return sgd(grad(primal_loss), transform, z_vect_0, alpha, beta, N_iters)
def run(): train_images, train_labels, _, _, _ = load_data(normalize=True) train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True) N_weights = parser.N def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bindict = {k : np.linspace(-1,1,N_bins) * np.exp(log_param_scale) # Different cdf per layer. for k, v in parser.idxs_and_shapes.iteritems()} output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) #X0, dX_dbins = bininvcdf(W_uniform, bins) X_uniform = npr.rand(N_weights) # Weights are uniform passed through an inverse cdf. X0 = np.zeros(N_weights) dX_dbins = {} for k, cur_bins in bindict.iteritems(): cur_slice, cur_shape = parser.idxs_and_shapes[k] cur_xs = X_uniform[cur_slice] cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins) X0[cur_slice] = cur_X0 dX_dbins[k] = cur_dX_dbins results = sgd(indexed_loss_fun, batch_idxs, N_iters, X0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] learning_curve = results['learning_curve'] output.append((learning_curve, bindict)) # Update bins with one gradient step. for k, bins in bindict.iteritems(): dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k]) bins = bins - dL_dbins * bin_stepsize bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize bindict[k] = np.sort(bins) bindict = bindict.copy() return output
def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results): RS = RandomState((seed, i_hyper, i_primal, i_script)) w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg) ) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results["valid_loss"].append(getval(valid_loss) / N_scripts) results["train_loss"].append(total_loss(w_vect_final, train_data) / N_scripts) return valid_loss
def run(): train_images, train_labels, _, _, _ = load_data() train_images = train_images[:N_data, :] train_labels = train_labels[:N_data, :] batch_idxs = BatchList(N_data, batch_size) iter_per_epoch = len(batch_idxs) N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg) def indexed_loss_fun(w, idxs): return loss_fun(w, X=train_images[idxs], T=train_labels[idxs]) log_alphas = np.full(N_iters, log_alpha_0) betas = np.full(N_iters, beta_0) npr.seed(2) V0 = npr.randn(N_weights) * velocity_scale #W0 = npr.randn(N_weights) * np.exp(log_param_scale) bins = np.linspace(-1, 1, N_bins) * np.exp(log_param_scale) W_uniform = npr.rand(N_weights) output = [] for i in range(N_meta_iter): print "Meta iteration {0}".format(i) W0, dW_dbins = bininvcdf(W_uniform, bins) results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, np.exp(log_alphas), betas, record_learning_curve=True) dL_dx = results['d_x'] dL_dbins = np.dot(dL_dx, dW_dbins) learning_curve = results['learning_curve'] output.append((learning_curve, bins)) bins = bins - dL_dbins * bin_stepsize bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize bins.sort() # Sort in place. return output
def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results['valid_loss'].append(getval(valid_loss) / N_scripts) results['train_loss'].append(total_loss(w_vect_final, train_data) / N_scripts) results['tests_loss'].append(total_loss(w_vect_final, tests_data) / N_scripts) return valid_loss
def full_loss(W0, V0, alphas, betas): result = sgd(loss_fun, batch_idxs, N_iter, W0, V0, alphas, betas) x_final = result['x_final'] return loss_fun(x_final, batch_idxs.all_idxs)