def problem5(T = 10, L = 0.2): toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels) plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels) plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
def test_toy_data(): toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') from sklearn import svm clf = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma="auto", kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) clf.fit(toy_features, toy_labels) # print(clf.coef_) # coef_ is only available when using a linear kernel print(clf.dual_coef_)
def test_algorithm_compare(self): # ------------------------------------------------------------------------------- # # Problem 5 # #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 100 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Pegasos', thetas_pegasos) return
for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # Section 1.7 #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron)
# You may modify the following when adding additional features (Part 3c) bigram_dictionary = lab2.bigram_dictionary(train_texts) train_final_features = lab2.extract_final_features(train_texts, dictionary, bigram_dictionary) val_final_features = lab2.extract_final_features(val_texts, dictionary, bigram_dictionary) test_final_features = lab2.extract_final_features(test_texts, dictionary, bigram_dictionary) #------------------------------------------------------------------------------- # Part 1 - Perceptron Algorithm #------------------------------------------------------------------------------- toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv') theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5) utils.plot_toy_results(toy_features, toy_labels, theta, theta_0) #------------------------------------------------------------------------------- # Part 2 - Classifying Reviews #------------------------------------------------------------------------------- theta, theta_0 = lab2.perceptron(train_bow_features, train_labels, T=5) train_accuracy = lab2.accuracy(train_bow_features, train_labels, theta, theta_0) val_accuracy = lab2.accuracy(val_bow_features, val_labels, theta, theta_0)
train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data)) val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) # #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') T = 5 L = 10 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) plot_toy_results('Perceptron', thetas_perceptron) plot_toy_results('Average Perceptron', thetas_avg_perceptron) plot_toy_results('Average Passive-Aggressive', thetas_avg_pa)
def main(unused_argv): # proposal_hidden_dims = [20] # likelihood_hidden_dims = [0] with tf.Graph().as_default(): if FLAGS.dataset in ["mnist", "struct_mnist"]: train_xs, valid_xs, test_xs = utils.load_mnist() elif FLAGS.dataset == "omniglot": train_xs, valid_xs, test_xs = utils.load_omniglot() elif FLAGS.dataset == "toy": train_xs, valid_xs, test_xs = utils.load_toy_data() print("dataset = ", train_xs.shape) # Placeholder for input mnist digits. # observations_ph = tf.placeholder("float32", [None, 2]) observations_ph = tf.placeholder("float32", [None, 1]) # set up your prior dist, proposal and likelihood networks (prior, likelihood, proposal) = model.get_toy_models(train_xs, which_example="toy1D") # Compute the lower bound and the loss estimators = model.iwae( prior, likelihood, proposal, observations_ph, FLAGS.num_samples, [], # [alpha, beta, gamma, delta], contexts=None) print("VARS: ", proposal.fcnet.get_variables()) log_p_hat, neg_model_loss, neg_inference_loss = estimators[ FLAGS.estimator] elbo = estimators["elbo"] model_loss = -tf.reduce_mean(neg_model_loss) inference_loss = -tf.reduce_mean(neg_inference_loss) log_p_hat_mean = tf.reduce_mean(log_p_hat) # this is over K samples print("INFERENCE LOSS SHAPE = ", neg_inference_loss.shape) model_params = prior.get_parameter_mu() print(model_params) inference_network_params = proposal.fcnet.get_variables() # Compute and apply the gradients, summarizing the gradient variance. global_step = tf.train.get_or_create_global_step() opt = tf.train.AdamOptimizer(FLAGS.learning_rate) cv_grads = [] model_grads = opt.compute_gradients(model_loss, var_list=model_params) # inference model (encoder) params are just A and b. (Ax+b) inference_grads = opt.compute_gradients( inference_loss, var_list=inference_network_params) grads = model_grads + inference_grads #+ cv_grads model_ema_op, model_grad_variance, _ = ( utils.summarize_grads(model_grads)) print("grads = ", grads) inference_ema_op, inference_grad_variance, inference_grad_snr_sq = ( utils.summarize_grads(inference_grads)) ema_ops = [model_ema_op, inference_ema_op] # this ensures you evaluate ema_ops before the apply_gradient function :) with tf.control_dependencies(ema_ops): train_op = opt.apply_gradients(grads, global_step=global_step) # l2_norm = lambda t: tf.sqrt(tf.reduce_sum(tf.pow(t, 2))) # for gradient, variable in inference_grads: # tf.summary.scalar("phi_grads/" + variable.name, l2_norm(gradient)) # tf.summary.scalar("phi_vars/" + variable.name, l2_norm(variable)) # tf.summary.histogram("phi_grads/" + variable.name, gradient) # tf.summary.histogram("phi_vars/" + variable.name, variable) tf.summary.scalar("estimators/elbo", elbo) tf.summary.scalar("estimators/difference", elbo - log_p_hat_mean) # tf.summary.scalar("phi_grad/%s" % FLAGS.estimator, inference_ema_op) tf.summary.scalar("phi_grad_variance/%s" % FLAGS.estimator, inference_grad_variance) tf.summary.scalar("model_grad", model_grad_variance) # tf.summary.scalar("inference_grad_snr_sq/%s" % FLAGS.estimator, inference_grad_snr_sq) tf.summary.scalar("log_p_hat/train", log_p_hat_mean) exp_name = "%s.lr-%g.n_samples-%d.batch_size-%d.alpha-%g.dataset-%s.run-%d" % ( FLAGS.estimator, FLAGS.learning_rate, FLAGS.num_samples, FLAGS.batch_size, FLAGS.alpha, FLAGS.dataset, FLAGS.run) checkpoint_dir = os.path.join(FLAGS.logdir, FLAGS.subfolder, exp_name) print("Checkpoints: : ", checkpoint_dir) if FLAGS.initial_checkpoint_dir and not tf.gfile.Exists( checkpoint_dir): tf.gfile.MakeDirs(checkpoint_dir) f = "checkpoint" tf.gfile.Copy(os.path.join(FLAGS.initial_checkpoint_dir, f), os.path.join(checkpoint_dir, f)) with tf.train.MonitoredTrainingSession( is_chief=True, hooks= [ create_logging_hook({ "Step": global_step, "log_p_hat": log_p_hat_mean, # "model_grads": model_grad, # "model_grad_variance": model_grad_variance, "infer_grad_varaince": inference_grad_variance, "infer_grad_snr_sq": inference_grad_snr_sq, }) ], checkpoint_dir=checkpoint_dir, save_checkpoint_secs=10, save_summaries_steps=FLAGS.summarize_every, # log_step_count_steps=FLAGS.summarize_every * 10 log_step_count_steps= 0, # disable logging of steps/s to avoid TF warning in validation sets # config=tf.ConfigProto(log_device_placement=True) # spits out the location of each computation (CPU, GPU etc.) ) as sess: writer = summary_io.SummaryWriterCache.get(checkpoint_dir) t_stats = [] cur_step = -1 indices = list(range(train_xs.shape[0])) n_epoch = 0 def run_eval(cur_step, split="valid", eval_batch_size=256): """Run evaluation on a datasplit.""" if split == "valid": eval_dataset = valid_xs elif split == "test": eval_dataset = test_xs log_p_hat_vals = [] for i in range(0, eval_dataset.shape[0], eval_batch_size): # batch_xs = utils.binarize_batch_xs(eval_dataset[i:(i + eval_batch_size)]) batch_xs = eval_dataset[i:(i + eval_batch_size)] log_p_hat_vals.append( sess.run(log_p_hat_mean, feed_dict={observations_ph: batch_xs})) summary = tf.Summary(value=[ tf.Summary.Value(tag="log_p_hat/%s" % split, simple_value=np.mean(log_p_hat_vals)) ]) writer.add_summary(summary, cur_step) print("curr_step: %g, log_p_hat/%s: %g" % (cur_step, split, np.mean(log_p_hat_vals))) while cur_step < FLAGS.max_steps and not sess.should_stop(): n_epoch += 1 random.shuffle(indices) for i in range(0, train_xs.shape[0], FLAGS.batch_size): if sess.should_stop() or cur_step > FLAGS.max_steps: break # Get a batch, then dynamically binarize ns = indices[i:i + FLAGS.batch_size] # batch_xs = utils.binarize_batch_xs(train_xs[ns]) batch_xs = train_xs[ns] _, cur_step, grads_ = \ sess.run([train_op, global_step, grads], feed_dict={observations_ph: batch_xs}) # grads_ = sess.run([train_op, global_step, model_params, grads], feed_dict={observations_ph: batch_xs}) if n_epoch % 10 == 0: print("epoch #", n_epoch) run_eval(cur_step, "test") run_eval(cur_step, "valid") # var_names = ["theta", "A ", "b "] var_names = ["A ", "b "] for m, (i, j) in enumerate(grads_): print(var_names[m], ": grad, val: ", i, j)
def toy_example(num_samples=None, noise=(0,0)): if num_samples==None: num_samples = FLAGS.num_samples with tf.GradientTape() as tape: train_xs, valid_xs, test_xs = utils.load_toy_data() batch_xs = train_xs[0:FLAGS.batch_size] # [batch_size, input_dim] # set up your prior model, proposal and likelihood networks p_z = ToyPrior(mu_inital_value = 2., size=FLAGS.latent_dim, name="toy_prior") # returns a callable Normal distribution p_x_given_z = ToyConditionalNormalLikelihood() # with tf.name_scope('proposal') as scope: q_z = ToyConditionalNormal( size=FLAGS.latent_dim, hidden_layer_sizes=1, initializers=None, use_bias=True, name="proposal") # initialise the network parameters to optimal (plus some specified N dist'ed noise) q_z.initialise_and_fix_network(batch_xs, noise) # returns the Normal dist proposal, and the parameters (fixed to optimal A and b) proposal, inference_network_params = q_z(batch_xs, stop_gradient=False) z = proposal.sample(sample_shape=[num_samples]) # [num_samples, batch_size, latent_dim] print("z samples ", z.shape) # returns a Normal dist conditioned on z likelihood = p_x_given_z(z) # returns the Prior normal (p_z), and the prior parameter mu prior, mu = p_z() log_p_z = tf.reduce_sum(prior.log_prob(z), axis=-1) # [num_samples, batch_size] log_q_z = tf.reduce_sum(proposal.log_prob(z), axis=-1) # [num_samples, batch_size] log_p_x_given_z = tf.reduce_sum(likelihood.log_prob(batch_xs), axis=-1) # [num_samples, batch_size] log_weights = log_p_z + log_p_x_given_z - log_q_z # [num_samples, batch_size] # This step is crucial for replicating the IWAE bound. log of the sum, NOT sum of the log (the VAE bound - where M increases) log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0) # this sums over K samples, and returns us to IWAE estimator land log_avg_weight = log_sum_weight - tf.log(tf.to_float(num_samples)) inference_loss = -tf.reduce_mean(log_avg_weight) # print("shapes", log_p_z.shape, log_p_x_given_z.shape, log_q_z.shape, log_weights.shape, log_sum_weight.shape, inference_loss.shape) parameters = (inference_network_params[0], inference_network_params[1], mu) # print("near optimal parameters: ", parameters) grads = tape.gradient(inference_loss, parameters) # Build the evidence lower bound (ELBO) or the negative loss # kl = tf.reduce_mean(tfd.kl_divergence(proposal, prior), axis=-1) # analytic KL # log_sum_ll = tf.reduce_logsumexp(log_p_x_given_z, axis=0) # this converts back to IWAE estimator (log of the sum) # expected_log_likelihood = log_sum_ll - tf.log(tf.to_float(num_samples)) # KL_elbo = tf.reduce_mean(expected_log_likelihood - kl) if FLAGS.using_BQ: def get_log_joint(z): return np.reshape(p_x_given_z(z).log_prob(batch_xs).numpy() + prior.log_prob(z).numpy(), (-1, 1)) kernel = GPy.kern.RBF(1, variance=2, lengthscale=2) kernel.variance.constrain_bounded(1e-5, 1e5) bq_likelihood = GPy.likelihoods.Gaussian(variance=1e-1) bq_prior = Gaussian(mean=proposal._loc.numpy().squeeze(), covariance=proposal._scale.numpy().item()) initial_x = bq_prior.sample(5) initial_y = [] for point in initial_x: initial_y.append(get_log_joint(np.atleast_2d(point))) initial_y = np.concatenate(initial_y) mean_function = NegativeQuadratic(1) gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kernel, likelihood=bq_likelihood, mean_function=mean_function) warped_gp = VanillaGP(gpy_gp) bq_model = IntegrandModel(warped_gp, bq_prior) for i in range(10): if i % 5 == 0: gpy_gp.optimize_restarts(num_restarts=5) failed = True while failed: try: batch = select_batch(bq_model, 1, KRIGING_BELIEVER) failed = False except FloatingPointError: gpy_gp.optimize_restarts(num_restarts=5) X = np.array(batch) Y = get_log_joint(X) bq_model.update(batch, Y) gpy_gp.optimize_restarts(num_restarts=5) bq_elbo = bq_model.integral_mean() import scipy.integrate def integrand(z): return get_log_joint(z) * np.exp(bq_prior.logpdf(np.atleast_2d(z))) brute_force_elbo = scipy.integrate.quad(integrand, -10, 10) print("BQ ", bq_elbo) print("ACTUAL ELBO ", brute_force_elbo) return grads
val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data)) test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data)) dictionary = p1.bag_of_words(train_texts) train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) #------------------------------------------------------------------------------- # Calculate theta & theta_0 using each algorithm #------------------------------------------------------------------------------- toy_features, toy_labels = toy_data = utils.load_toy_data( cwd + '\\Review-Analyzer\\toy_data.tsv') T = 10 L = 0.2 thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) def plot_toy_results(algo_name, thetas): print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) print('theta_0 for', algo_name, 'is', str(thetas[1])) utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)