def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes, L2_per_dpt) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): x_init_scale = np.full(N_param, init_scale) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) return nllfun(w, train_images[idxs], train_labels[idxs]) * N_train gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) if t % thin != 0 and t != N_iter and t != 0: return results[('iterations', i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" \ " Test Err {3:2.4f}".format(t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1]) rs = RandomState((seed, i)) entropic_descent2(gradfun, callback=callback, x_scale=x_init_scale, epsilon=epsilon, gamma=gamma, alpha=alpha, annealing_schedule=annealing_schedule, rs=rs) return results
def run(): all_results = [] for width in widths: init_scale = np.min(0.5 / np.sqrt(width), 0.5 / np.sqrt(784)) def neg_log_prior(w): D = len(w) return 0.5 * D * np.log(2 * np.pi) + 0.5 * np.dot(w, w) / init_scale ** 2 + D * np.log(init_scale) (train_images, train_labels), (tests_images, tests_labels) = load_data_subset(N_train, N_tests) layer_sizes = [784, 100, 10] parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Number of parameters in model: ", N_param def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train # nlp = neg_log_prior(w) return nll # + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append(-nllfun(x, train_images, train_labels)) results["tests_likelihood"].append(-nllfun(x, tests_images, tests_labels)) results["tests_error"].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood"].append( estimate_marginal_likelihood( results["log_prior_per_dpt"][-1], results["train_likelihood"][-1], results["entropy_per_dpt"][-1] ) ) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood"][-1], results["tests_error"][-1], ) results = defaultdict(list) rs = RandomState((seed)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=width) all_results.append(results) return all_results
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt" ].append(entropy / N_train) results["x_rms" ].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt" ].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations" ].append(t) results["train_likelihood"].append(-nllfun(x, train_images, train_labels)) results["tests_likelihood"].append(-nllfun(x, tests_images, tests_labels)) results["tests_error" ].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood" ].append(estimate_marginal_likelihood( results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) preds[i].append(pred_fun(x, tests_images)) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] preds = defaultdict(list) for i in xrange(N_samples): results = defaultdict(list) rs = RandomState((seed, i)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=100) all_results.append(results) # Make ensemble prediction by averaging predicted class-conditional probabilities. ensemble_frac_err = [] ensemble_loglike = [] for t in xrange(len(all_results[0]["iterations"])): cur_probs = [preds[i][t] for i in xrange(N_samples)] avg_probs_unn = np.mean(np.exp(cur_probs), axis=0) avg_probs = avg_probs_unn / np.sum(avg_probs_unn, axis=1, keepdims=True) ensemble_preds = np.argmax(avg_probs, axis=1) ensemble_frac_err.append(np.mean(np.argmax(tests_labels, axis=1) != ensemble_preds)) ensemble_loglike.append(np.sum(np.log(avg_probs) * tests_labels)/tests_images.shape[0]) return all_results, ensemble_frac_err, ensemble_loglike
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Number of parameters in model: ", N_param def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append( -nllfun(x, train_images, train_labels)) results["tests_likelihood"].append( -nllfun(x, tests_images, tests_labels)) results["tests_error"].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood"].append( estimate_marginal_likelihood(results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] preds = defaultdict(list) for width in widths: results = defaultdict(list) rs = RandomState((seed)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=width) all_results.append(results) return all_results
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[('iterations', i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" \ " Test Err {3:2.4f}".format(t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1]) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. #entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate) return results
def run(): (train_images, train_labels), (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[("iterations", i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" " Test Err {3:2.4f}".format( t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1], ) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. # entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3( gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, ) return results
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt" ].append(entropy / N_train) results["x_rms" ].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt" ].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations" ].append(t) results["train_likelihood"].append(-nllfun(x, train_images, train_labels)) results["tests_likelihood"].append(-nllfun(x, tests_images, tests_labels)) results["tests_error" ].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood" ].append(estimate_marginal_likelihood( results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] for i in xrange(N_samples): results = defaultdict(list) rs = RandomState((seed, i)) sgd_entropic(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback) all_results.append(results) return all_results
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append( -nllfun(x, train_images, train_labels)) results["tests_likelihood"].append( -nllfun(x, tests_images, tests_labels)) results["tests_error"].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood"].append( estimate_marginal_likelihood(results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) preds[i].append(pred_fun(x, tests_images)) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] preds = defaultdict(list) for i in xrange(N_samples): results = defaultdict(list) rs = RandomState((seed, i)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=100) all_results.append(results) # Make ensemble prediction by averaging predicted class-conditional probabilities. ensemble_frac_err = [] ensemble_loglike = [] for t in xrange(len(all_results[0]["iterations"])): cur_probs = [preds[i][t] for i in xrange(N_samples)] avg_probs_unn = np.mean(np.exp(cur_probs), axis=0) avg_probs = avg_probs_unn / np.sum( avg_probs_unn, axis=1, keepdims=True) ensemble_preds = np.argmax(avg_probs, axis=1) ensemble_frac_err.append( np.mean(np.argmax(tests_labels, axis=1) != ensemble_preds)) ensemble_loglike.append( np.sum(np.log(avg_probs) * tests_labels) / tests_images.shape[0]) return all_results, ensemble_frac_err, ensemble_loglike