def hyperloss(transform_vect, i_hyper): RS = RandomState((seed, i_hyper, "hyperloss")) cur_train_data, cur_valid_data = random_partition( train_data, RS, [10, 2]) z_vect_final = train_z(cur_train_data, transform_vect, RS) w_vect_final = transform_weights(z_vect_final, transform_vect) return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts
def hyperloss(transform_vect, i_hyper): RS = RandomState((seed, i_hyper, "hyperloss")) cur_train_data, cur_valid_data = random_partition(train_data, RS, [10, 2]) z_vect_final = train_z(cur_train_data, transform_vect, RS) w_vect_final = transform_weights(z_vect_final, transform_vect) return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts
def run(): RS = RandomState((seed, "top_rs")) all_data = omniglot.load_flipped_alphabets() train_data, tests_data = random_partition(all_data, RS, [12, 3]) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) transform_parser = make_transform([0] * N_layers) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def likelihood_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([ loss_fun(w[i], **script_data) for i, script_data in enumerate(data) ]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2) def train_z(data, transform_vect, RS): def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = likelihood_loss(w_vect, data) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_init_scale) return sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters) def train_sharing(): def hyperloss(transform_vect, i_hyper): RS = RandomState((seed, i_hyper, "hyperloss")) cur_train_data, cur_valid_data = random_partition( train_data, RS, [10, 2]) z_vect_final = train_z(cur_train_data, transform_vect, RS) w_vect_final = transform_weights(z_vect_final, transform_vect) return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts hypergrad = grad(hyperloss) cur_transform_vect = make_transform([init_script_corr] * N_layers).vect for i_hyper in range(N_meta_iter): print "Hyper iter {0}".format(i_hyper) grad_transform = hypergrad(cur_transform_vect, i_hyper) cur_transform_vect = cur_transform_vect - grad_transform * meta_alpha return cur_transform_vect transform_vects, train_losses, tests_losses = {}, {}, {} transform_vects['no_sharing'] = make_transform([0, 0, 0]).vect transform_vects['full_sharing'] = make_transform([1, 0, 0]).vect transform_vects['learned_sharing'] = train_sharing() for name in transform_vects.keys(): RS = RandomState("final_training") tv = transform_vects[name] trained_z = train_z(train_data, tv, RS) trained_w = transform_weights(trained_z, tv) train_losses[name] = likelihood_loss(trained_w, train_data) / N_scripts tests_losses[name] = likelihood_loss(trained_w, tests_data) / N_scripts print "{0} : train: {1}, test: {2}".format(name, train_losses[name], tests_losses[name]) return transform_parser, transform_vects, train_losses, tests_losses
def run(): RS = RandomState((seed, "top_rs")) all_data = omniglot.load_flipped_alphabets() train_data, tests_data = random_partition(all_data, RS, [12, 3]) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) transform_parser = make_transform([0] * N_layers) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def likelihood_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2) def train_z(data, transform_vect, RS): def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = likelihood_loss(w_vect, data) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_init_scale) return sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters) def train_sharing(): def hyperloss(transform_vect, i_hyper): RS = RandomState((seed, i_hyper, "hyperloss")) cur_train_data, cur_valid_data = random_partition(train_data, RS, [10, 2]) z_vect_final = train_z(cur_train_data, transform_vect, RS) w_vect_final = transform_weights(z_vect_final, transform_vect) return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts hypergrad = grad(hyperloss) cur_transform_vect = make_transform([init_script_corr] * N_layers).vect for i_hyper in range(N_meta_iter): print "Hyper iter {0}".format(i_hyper) grad_transform = hypergrad(cur_transform_vect, i_hyper) cur_transform_vect = cur_transform_vect - grad_transform * meta_alpha return cur_transform_vect transform_vects, train_losses, tests_losses = {}, {}, {} transform_vects['no_sharing'] = make_transform([0, 0, 0]).vect transform_vects['full_sharing'] = make_transform([1, 0, 0]).vect transform_vects['learned_sharing'] = train_sharing() for name in transform_vects.keys(): RS = RandomState("final_training") tv = transform_vects[name] trained_z = train_z(train_data, tv, RS) trained_w = transform_weights(trained_z, tv) train_losses[name] = likelihood_loss(trained_w, train_data) / N_scripts tests_losses[name] = likelihood_loss(trained_w, tests_data) / N_scripts print "{0} : train: {1}, test: {2}".format(name, train_losses[name], tests_losses[name]) return transform_parser, transform_vects, train_losses, tests_losses