def plot(): import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams['font.family'] = 'serif' mpl.rcParams['image.interpolation'] = 'none' with open('results.pkl') as f: transform_parser, transform_vects, train_losses, tests_losses = pickle.load(f) RS = RandomState((seed, "plotting")) fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) omniglot.show_alphabets(omniglot.load_rotated_alphabets(RS, normalize=False, angle=90), ax=ax) ax.plot([0, 20 * 28], [5 * 28, 5 * 28], '--k') ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical') plt.savefig("all_alphabets.png") # Plotting transformations names = ['no_sharing', 'full_sharing', 'learned_sharing'] title_strings = {'no_sharing' : 'Independent nets', 'full_sharing' : 'Shared bottom layer', 'learned_sharing' : 'Learned sharing'} covar_imgs = {name : build_covar_image(transform_vects[name]) for name in names} for i, name in enumerate(names): fig = plt.figure(0) fig.clf() fig.set_size_inches((2, 6)) ax = fig.add_subplot(111) ax.matshow(covar_imgs[name], cmap = mpl.cm.binary) ax.set_xticks([]) ax.set_yticks([]) plt.savefig('learned_corr_{0}.png'.format(i)) plt.savefig('learned_corr_{0}.pdf'.format(i))
def plot(): import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams['font.family'] = 'serif' mpl.rcParams['image.interpolation'] = 'none' with open('results.pkl') as f: transform_parser, transform_vects, train_losses, tests_losses = pickle.load( f) RS = RandomState((seed, "plotting")) fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) alphabets = omniglot.load_rotated_alphabets(RS, normalize=False, angle=90) num_cols = 15 num_rows = 5 omniglot.show_alphabets(alphabets, ax=ax, n_cols=num_cols) ax.plot([0, num_cols * 28], [num_rows * 28, num_rows * 28], '--k') #ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical') plt.savefig("all_alphabets.png", bbox_inches='tight') # Plotting transformations names = ['no_sharing', 'full_sharing', 'learned_sharing'] title_strings = { 'no_sharing': 'Independent nets', 'full_sharing': 'Shared bottom layer', 'learned_sharing': 'Learned sharing' } covar_imgs = { name: build_covar_image(transform_vects[name]) for name in names } for model_ix, model_name in enumerate(names): image_list = covar_imgs[model_name] for layer_ix, image in enumerate(image_list): fig = plt.figure(0) fig.clf() fig.set_size_inches((1, 1)) ax = fig.add_subplot(111) ax.matshow(image, cmap=mpl.cm.binary, vmin=0.0, vmax=1.0) ax.set_xticks([]) ax.set_yticks([]) plt.savefig('minifigs/learned_corr_{0}_{1}.png'.format( model_name, layer_ix), bbox_inches='tight') plt.savefig('minifigs/learned_corr_{0}_{1}.pdf'.format( model_name, layer_ix), bbox_inches='tight') # Write results to a nice latex table for paper. with open('results_table.tex', 'w') as f: f.write(" & No Sharing & Full Sharing & Learned \\\\\n") f.write("Training loss & {:2.2f} & {:2.2f} & {:2.2f} \\\\\n".format( train_losses['no_sharing'], train_losses['full_sharing'], train_losses['learned_sharing'])) f.write("Test loss & {:2.2f} & {:2.2f} & \\bf {:2.2f} ".format( tests_losses['no_sharing'], tests_losses['full_sharing'], tests_losses['learned_sharing']))
def plot(): import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams["font.family"] = "serif" mpl.rcParams["image.interpolation"] = "none" with open("results.pkl") as f: transform_parser, transform_vects, train_losses, tests_losses = pickle.load(f) RS = RandomState((seed, "plotting")) fig = plt.figure(0) fig.clf() ax = fig.add_subplot(111) alphabets = omniglot.load_rotated_alphabets(RS, normalize=False, angle=90) num_cols = 15 num_rows = 5 omniglot.show_alphabets(alphabets, ax=ax, n_cols=num_cols) ax.plot([0, num_cols * 28], [num_rows * 28, num_rows * 28], "--k") # ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical') plt.savefig("all_alphabets.png", bbox_inches="tight") # Plotting transformations names = ["no_sharing", "full_sharing", "learned_sharing"] title_strings = { "no_sharing": "Independent nets", "full_sharing": "Shared bottom layer", "learned_sharing": "Learned sharing", } covar_imgs = {name: build_covar_image(transform_vects[name]) for name in names} for model_ix, model_name in enumerate(names): image_list = covar_imgs[model_name] for layer_ix, image in enumerate(image_list): fig = plt.figure(0) fig.clf() fig.set_size_inches((1, 1)) ax = fig.add_subplot(111) ax.matshow(image, cmap=mpl.cm.binary, vmin=0.0, vmax=1.0) ax.set_xticks([]) ax.set_yticks([]) plt.savefig("minifigs/learned_corr_{0}_{1}.png".format(model_name, layer_ix), bbox_inches="tight") plt.savefig("minifigs/learned_corr_{0}_{1}.pdf".format(model_name, layer_ix), bbox_inches="tight") # Write results to a nice latex table for paper. with open("results_table.tex", "w") as f: f.write(" & No Sharing & Full Sharing & Learned \\\\\n") f.write( "Training loss & {:2.2f} & {:2.2f} & {:2.2f} \\\\\n".format( train_losses["no_sharing"], train_losses["full_sharing"], train_losses["learned_sharing"] ) ) f.write( "Test loss & {:2.2f} & {:2.2f} & \\bf {:2.2f} ".format( tests_losses["no_sharing"], tests_losses["full_sharing"], tests_losses["learned_sharing"] ) )
def run(): RS = RandomState((seed, "top_rs")) all_data = omniglot.load_rotated_alphabets(RS) train_data, tests_data = random_partition(all_data, RS, [12, 3]) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) transform_parser = make_transform([0] * N_layers) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def likelihood_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2) def train_z(data, transform_vect, RS): def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = likelihood_loss(w_vect, data) reg = regularization(z_vect) if record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_init_scale) return sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters) def train_sharing(): def hyperloss(transform_vect, i_hyper): RS = RandomState((seed, i_hyper, "hyperloss")) cur_train_data, cur_valid_data = random_partition(train_data, RS, [10, 2]) z_vect_final = train_z(cur_train_data, transform_vect, RS) w_vect_final = transform_weights(z_vect_final, transform_vect) return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts hypergrad = grad(hyperloss) cur_transform_vect = make_transform([init_script_corr] * N_layers).vect for i_hyper in range(N_meta_iter): print "Hyper iter {0}".format(i_hyper) grad_transform = hypergrad(cur_transform_vect, i_hyper) cur_transform_vect = cur_transform_vect - grad_transform * meta_alpha return cur_transform_vect transform_vects, train_losses, tests_losses = {}, {}, {} transform_vects['no_sharing'] = make_transform([0, 0, 0]).vect transform_vects['full_sharing'] = make_transform([1, 0, 0]).vect transform_vects['learned_sharing'] = train_sharing() for name in transform_vects.keys(): RS = RandomState("final_training") tv = transform_vects[name] trained_z = train_z(train_data, tv, RS) trained_w = transform_weights(trained_z, tv) train_losses[name] = likelihood_loss(trained_w, train_data) / N_scripts tests_losses[name] = likelihood_loss(trained_w, tests_data) / N_scripts print "{0} : train: {1}, test: {2}".format(name, train_losses[name], tests_losses[name]) return transform_parser, transform_vects, train_losses, tests_losses
def run(): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_rotated_alphabets( [11, 2, 2], RS) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size transform_parser = make_transform(N_scripts, script_corr_init) script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def total_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([ loss_fun(w[i], **script_data) for i, script_data in enumerate(data) ]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)) return loss + reg z_vect_0 = RS.randn( script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results['valid_loss'].append(getval(valid_loss) / N_scripts) results['train_loss'].append( total_loss(getval(w_vect_final), train_data) / N_scripts) results['tests_loss'].append( total_loss(getval(w_vect_final), tests_data) / N_scripts) return valid_loss all_transforms = [] hypergrad = grad(hyperloss) for i_hyper in range(N_meta_iter): grad_transform = hypergrad(transform_parser.vect, i_hyper, record_results=True) transform_parser.vect = transform_parser.vect - grad_transform * meta_alpha all_transforms.append(transform_parser.as_dict()) print "Hyper iter {0}".format(i_hyper) print "Results", {k: v[-1] for k, v in results.iteritems()} return results, all_transforms
def run(): """Three different parsers: w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single script script_parser[i_script] : weights vector for each script transform_parser[i_layer] : transform matrix (scripts x scripts) for each alphabet""" RS = RandomState((seed, "top_rs")) train_data, valid_data, tests_data = omniglot.load_rotated_alphabets([11, 2, 2], RS) w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes) N_weights = w_parser.vect.size transform_parser = make_transform(N_scripts, script_corr_init) script_parser = VectorParser() for i_script in range(N_scripts): script_parser[i_script] = np.zeros(N_weights) def get_layers(vect): layers = [] for i_layer in range(N_layers): weights_by_scripts = vect.reshape((N_scripts, N_weights)) weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)] biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)] assert weights_idxs.stop == biases_idxs.start layer_idxs = slice(weights_idxs.start, biases_idxs.stop) layers.append(weights_by_scripts[:, layer_idxs]) return layers def transform_weights(z_vect, transform_vect): z_layers = get_layers(z_vect) transform = transform_parser.new_vect(transform_vect) w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)] return np.concatenate(w_layers, axis=1).ravel() def total_loss(w_vect, data): w = script_parser.new_vect(w_vect) return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)]) def regularization(z_vect): return np.dot(z_vect, z_vect) * np.exp(log_L2_init) results = defaultdict(list) def hyperloss(transform_vect, i_hyper, record_results=True): RS = RandomState((seed, i_hyper, "hyperloss")) def primal_loss(z_vect, transform_vect, i_primal, record_results=False): w_vect = transform_weights(z_vect, transform_vect) loss = total_loss(w_vect, train_data) reg = regularization(z_vect) if VERBOSE and record_results and i_primal % N_thin == 0: print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format( i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)) return loss + reg z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale) z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None) w_vect_final = transform_weights(z_vect_final, transform_vect) valid_loss = total_loss(w_vect_final, valid_data) if record_results: results['valid_loss'].append(getval(valid_loss) / N_scripts) results['train_loss'].append(total_loss(getval(w_vect_final), train_data) / N_scripts) results['tests_loss'].append(total_loss(getval(w_vect_final), tests_data) / N_scripts) return valid_loss all_transforms = [] hypergrad = grad(hyperloss) for i_hyper in range(N_meta_iter): grad_transform = hypergrad(transform_parser.vect, i_hyper, record_results=True) transform_parser.vect = transform_parser.vect - grad_transform * meta_alpha all_transforms.append(transform_parser.as_dict()) print "Hyper iter {0}".format(i_hyper) print "Results", {k : v[-1] for k, v in results.iteritems()} return results, all_transforms