Esempio n. 1
0
File: main.py Progetto: yshen4/pymal
def problem5(T = 10, L = 0.2):
    toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

    thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
    thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
    thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

    plot_toy_results('Perceptron', thetas_perceptron, toy_features, toy_labels)
    plot_toy_results('Average Perceptron', thetas_avg_perceptron, toy_features, toy_labels)
    plot_toy_results('Pegasos', thetas_pegasos, toy_features, toy_labels)
Esempio n. 2
0
def test_toy_data():
    toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

    from sklearn import svm
    clf = svm.SVR(C=1.0,
                  cache_size=200,
                  coef0=0.0,
                  degree=3,
                  epsilon=0.1,
                  gamma="auto",
                  kernel='rbf',
                  max_iter=-1,
                  shrinking=True,
                  tol=0.001,
                  verbose=False)
    clf.fit(toy_features, toy_labels)
    # print(clf.coef_) # coef_ is only available when using a linear kernel
    print(clf.dual_coef_)
    def test_algorithm_compare(self):
        # -------------------------------------------------------------------------------
        # # Problem 5
        # #-------------------------------------------------------------------------------

        toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

        T = 100
        L = 0.2

        thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
        thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
        thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)

        def plot_toy_results(algo_name, thetas):
            print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
            print('theta_0 for', algo_name, 'is', str(thetas[1]))
            utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

        plot_toy_results('Perceptron', thetas_perceptron)
        plot_toy_results('Average Perceptron', thetas_avg_perceptron)
        plot_toy_results('Pegasos', thetas_pegasos)
        return
Esempio n. 4
0
                                  for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
#
#-------------------------------------------------------------------------------
# Section 1.7
#-------------------------------------------------------------------------------
toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 5
L = 10

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)


plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
Esempio n. 5
0
# You may modify the following when adding additional features (Part 3c)

bigram_dictionary = lab2.bigram_dictionary(train_texts)

train_final_features = lab2.extract_final_features(train_texts, dictionary,
                                                   bigram_dictionary)
val_final_features = lab2.extract_final_features(val_texts, dictionary,
                                                 bigram_dictionary)
test_final_features = lab2.extract_final_features(test_texts, dictionary,
                                                  bigram_dictionary)

#-------------------------------------------------------------------------------
# Part 1 - Perceptron Algorithm
#-------------------------------------------------------------------------------

toy_features, toy_labels = utils.load_toy_data('../../Data/toy_data.csv')

theta, theta_0 = lab2.perceptron(toy_features, toy_labels, T=5)

utils.plot_toy_results(toy_features, toy_labels, theta, theta_0)

#-------------------------------------------------------------------------------
# Part 2 - Classifying Reviews
#-------------------------------------------------------------------------------

theta, theta_0 = lab2.perceptron(train_bow_features, train_labels, T=5)

train_accuracy = lab2.accuracy(train_bow_features, train_labels, theta,
                               theta_0)
val_accuracy = lab2.accuracy(val_bow_features, val_labels, theta, theta_0)
train_texts, train_labels = zip(*((sample['text'], sample['sentiment']) for sample in train_data))
val_texts, val_labels = zip(*((sample['text'], sample['sentiment']) for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment']) for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)
#
#-------------------------------------------------------------------------------
#
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv')

T = 5
L = 10

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_avg_pa = p1.average_passive_aggressive(toy_features, toy_labels, T, L)

def plot_toy_results(algo_name, thetas):
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)

plot_toy_results('Perceptron', thetas_perceptron)
plot_toy_results('Average Perceptron', thetas_avg_perceptron)
plot_toy_results('Average Passive-Aggressive', thetas_avg_pa)
Esempio n. 7
0
def main(unused_argv):
    # proposal_hidden_dims = [20]
    # likelihood_hidden_dims = [0]

    with tf.Graph().as_default():
        if FLAGS.dataset in ["mnist", "struct_mnist"]:
            train_xs, valid_xs, test_xs = utils.load_mnist()
        elif FLAGS.dataset == "omniglot":
            train_xs, valid_xs, test_xs = utils.load_omniglot()
        elif FLAGS.dataset == "toy":
            train_xs, valid_xs, test_xs = utils.load_toy_data()

        print("dataset = ", train_xs.shape)

        # Placeholder for input mnist digits.
        # observations_ph = tf.placeholder("float32", [None, 2])
        observations_ph = tf.placeholder("float32", [None, 1])

        # set up your prior dist, proposal and likelihood networks
        (prior, likelihood,
         proposal) = model.get_toy_models(train_xs, which_example="toy1D")

        # Compute the lower bound and the loss
        estimators = model.iwae(
            prior,
            likelihood,
            proposal,
            observations_ph,
            FLAGS.num_samples,
            [],  # [alpha, beta, gamma, delta],
            contexts=None)

        print("VARS: ", proposal.fcnet.get_variables())
        log_p_hat, neg_model_loss, neg_inference_loss = estimators[
            FLAGS.estimator]
        elbo = estimators["elbo"]

        model_loss = -tf.reduce_mean(neg_model_loss)
        inference_loss = -tf.reduce_mean(neg_inference_loss)
        log_p_hat_mean = tf.reduce_mean(log_p_hat)

        # this is over K samples
        print("INFERENCE LOSS SHAPE = ", neg_inference_loss.shape)

        model_params = prior.get_parameter_mu()
        print(model_params)
        inference_network_params = proposal.fcnet.get_variables()

        # Compute and apply the gradients, summarizing the gradient variance.
        global_step = tf.train.get_or_create_global_step()
        opt = tf.train.AdamOptimizer(FLAGS.learning_rate)

        cv_grads = []

        model_grads = opt.compute_gradients(model_loss, var_list=model_params)
        # inference model (encoder) params are just A and b. (Ax+b)
        inference_grads = opt.compute_gradients(
            inference_loss, var_list=inference_network_params)

        grads = model_grads + inference_grads  #+ cv_grads

        model_ema_op, model_grad_variance, _ = (
            utils.summarize_grads(model_grads))
        print("grads = ", grads)
        inference_ema_op, inference_grad_variance, inference_grad_snr_sq = (
            utils.summarize_grads(inference_grads))

        ema_ops = [model_ema_op, inference_ema_op]

        # this ensures you evaluate ema_ops before the apply_gradient function :)
        with tf.control_dependencies(ema_ops):
            train_op = opt.apply_gradients(grads, global_step=global_step)

        # l2_norm = lambda t: tf.sqrt(tf.reduce_sum(tf.pow(t, 2)))
        # for gradient, variable in inference_grads:
        #     tf.summary.scalar("phi_grads/" + variable.name, l2_norm(gradient))
        #     tf.summary.scalar("phi_vars/" + variable.name, l2_norm(variable))
        #     tf.summary.histogram("phi_grads/" + variable.name, gradient)
        #     tf.summary.histogram("phi_vars/" + variable.name, variable)

        tf.summary.scalar("estimators/elbo", elbo)
        tf.summary.scalar("estimators/difference", elbo - log_p_hat_mean)

        # tf.summary.scalar("phi_grad/%s" % FLAGS.estimator, inference_ema_op)
        tf.summary.scalar("phi_grad_variance/%s" % FLAGS.estimator,
                          inference_grad_variance)

        tf.summary.scalar("model_grad", model_grad_variance)
        # tf.summary.scalar("inference_grad_snr_sq/%s" % FLAGS.estimator, inference_grad_snr_sq)

        tf.summary.scalar("log_p_hat/train", log_p_hat_mean)

        exp_name = "%s.lr-%g.n_samples-%d.batch_size-%d.alpha-%g.dataset-%s.run-%d" % (
            FLAGS.estimator, FLAGS.learning_rate, FLAGS.num_samples,
            FLAGS.batch_size, FLAGS.alpha, FLAGS.dataset, FLAGS.run)
        checkpoint_dir = os.path.join(FLAGS.logdir, FLAGS.subfolder, exp_name)
        print("Checkpoints: : ", checkpoint_dir)

        if FLAGS.initial_checkpoint_dir and not tf.gfile.Exists(
                checkpoint_dir):
            tf.gfile.MakeDirs(checkpoint_dir)
            f = "checkpoint"
            tf.gfile.Copy(os.path.join(FLAGS.initial_checkpoint_dir, f),
                          os.path.join(checkpoint_dir, f))

        with tf.train.MonitoredTrainingSession(
                is_chief=True,
                hooks=
            [
                create_logging_hook({
                    "Step": global_step,
                    "log_p_hat": log_p_hat_mean,
                    # "model_grads": model_grad,
                    # "model_grad_variance": model_grad_variance,
                    "infer_grad_varaince": inference_grad_variance,
                    "infer_grad_snr_sq": inference_grad_snr_sq,
                })
            ],
                checkpoint_dir=checkpoint_dir,
                save_checkpoint_secs=10,
                save_summaries_steps=FLAGS.summarize_every,
                # log_step_count_steps=FLAGS.summarize_every * 10
                log_step_count_steps=
                0,  # disable logging of steps/s to avoid TF warning in validation sets
                # config=tf.ConfigProto(log_device_placement=True) # spits out the location of each computation (CPU, GPU etc.)
        ) as sess:

            writer = summary_io.SummaryWriterCache.get(checkpoint_dir)
            t_stats = []
            cur_step = -1
            indices = list(range(train_xs.shape[0]))
            n_epoch = 0

            def run_eval(cur_step, split="valid", eval_batch_size=256):
                """Run evaluation on a datasplit."""
                if split == "valid":
                    eval_dataset = valid_xs
                elif split == "test":
                    eval_dataset = test_xs

                log_p_hat_vals = []
                for i in range(0, eval_dataset.shape[0], eval_batch_size):
                    # batch_xs = utils.binarize_batch_xs(eval_dataset[i:(i + eval_batch_size)])
                    batch_xs = eval_dataset[i:(i + eval_batch_size)]
                    log_p_hat_vals.append(
                        sess.run(log_p_hat_mean,
                                 feed_dict={observations_ph: batch_xs}))

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="log_p_hat/%s" % split,
                                     simple_value=np.mean(log_p_hat_vals))
                ])
                writer.add_summary(summary, cur_step)
                print("curr_step: %g, log_p_hat/%s: %g" %
                      (cur_step, split, np.mean(log_p_hat_vals)))

            while cur_step < FLAGS.max_steps and not sess.should_stop():
                n_epoch += 1

                random.shuffle(indices)

                for i in range(0, train_xs.shape[0], FLAGS.batch_size):
                    if sess.should_stop() or cur_step > FLAGS.max_steps:
                        break

                    # Get a batch, then dynamically binarize
                    ns = indices[i:i + FLAGS.batch_size]
                    # batch_xs = utils.binarize_batch_xs(train_xs[ns])
                    batch_xs = train_xs[ns]

                    _, cur_step, grads_ = \
                        sess.run([train_op, global_step, grads], feed_dict={observations_ph: batch_xs})
                    # grads_ = sess.run([train_op, global_step, model_params, grads], feed_dict={observations_ph: batch_xs})

                if n_epoch % 10 == 0:
                    print("epoch #", n_epoch)
                    run_eval(cur_step, "test")
                    run_eval(cur_step, "valid")

                    # var_names = ["theta", "A    ", "b    "]
                    var_names = ["A    ", "b    "]
                    for m, (i, j) in enumerate(grads_):
                        print(var_names[m], ": grad, val: ", i, j)
Esempio n. 8
0
def toy_example(num_samples=None, noise=(0,0)):

    if num_samples==None:
        num_samples = FLAGS.num_samples

    with tf.GradientTape() as tape:

        train_xs, valid_xs, test_xs = utils.load_toy_data()
        batch_xs = train_xs[0:FLAGS.batch_size]  # [batch_size, input_dim]

        # set up your prior model, proposal and likelihood networks
        p_z = ToyPrior(mu_inital_value = 2., size=FLAGS.latent_dim, name="toy_prior")

        # returns a callable Normal distribution
        p_x_given_z = ToyConditionalNormalLikelihood()

        # with tf.name_scope('proposal') as scope:
        q_z = ToyConditionalNormal(
            size=FLAGS.latent_dim,
            hidden_layer_sizes=1,
            initializers=None,
            use_bias=True,
            name="proposal")

        # initialise the network parameters to optimal (plus some specified N dist'ed noise)
        q_z.initialise_and_fix_network(batch_xs, noise)

        # returns the Normal dist proposal, and the parameters (fixed to optimal A and b)
        proposal, inference_network_params = q_z(batch_xs, stop_gradient=False)

        z = proposal.sample(sample_shape=[num_samples])
        # [num_samples, batch_size, latent_dim]
        print("z samples ", z.shape)

        # returns a Normal dist conditioned on z
        likelihood = p_x_given_z(z)

        # returns the Prior normal (p_z), and the prior parameter mu
        prior, mu = p_z()

        log_p_z = tf.reduce_sum(prior.log_prob(z), axis=-1)   # [num_samples, batch_size]
        log_q_z = tf.reduce_sum(proposal.log_prob(z), axis=-1)   # [num_samples, batch_size]
        log_p_x_given_z = tf.reduce_sum(likelihood.log_prob(batch_xs), axis=-1)  # [num_samples, batch_size]
        log_weights = log_p_z + log_p_x_given_z - log_q_z  # [num_samples, batch_size]

        # This step is crucial for replicating the IWAE bound. log of the sum, NOT sum of the log (the VAE bound - where M increases)
        log_sum_weight = tf.reduce_logsumexp(log_weights, axis=0)  # this sums over K samples, and returns us to IWAE estimator land
        log_avg_weight = log_sum_weight - tf.log(tf.to_float(num_samples))
        inference_loss = -tf.reduce_mean(log_avg_weight)
        # print("shapes", log_p_z.shape, log_p_x_given_z.shape, log_q_z.shape, log_weights.shape, log_sum_weight.shape, inference_loss.shape)

        parameters = (inference_network_params[0], inference_network_params[1], mu)
        # print("near optimal parameters: ", parameters)
        grads = tape.gradient(inference_loss, parameters)

        # Build the evidence lower bound (ELBO) or the negative loss
        # kl = tf.reduce_mean(tfd.kl_divergence(proposal, prior), axis=-1)  # analytic KL
        # log_sum_ll = tf.reduce_logsumexp(log_p_x_given_z, axis=0)  # this converts back to IWAE estimator (log of the sum)
        # expected_log_likelihood = log_sum_ll - tf.log(tf.to_float(num_samples))
        # KL_elbo = tf.reduce_mean(expected_log_likelihood - kl)

        if FLAGS.using_BQ:

            def get_log_joint(z):
                return np.reshape(p_x_given_z(z).log_prob(batch_xs).numpy() + prior.log_prob(z).numpy(), (-1, 1))

            kernel = GPy.kern.RBF(1, variance=2, lengthscale=2)
            kernel.variance.constrain_bounded(1e-5, 1e5)
            bq_likelihood = GPy.likelihoods.Gaussian(variance=1e-1)

            bq_prior = Gaussian(mean=proposal._loc.numpy().squeeze(), covariance=proposal._scale.numpy().item())

            initial_x = bq_prior.sample(5)
            initial_y = []
            for point in initial_x:
                initial_y.append(get_log_joint(np.atleast_2d(point)))
            initial_y = np.concatenate(initial_y)
            mean_function = NegativeQuadratic(1)
            gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=kernel, likelihood=bq_likelihood, mean_function=mean_function)
            warped_gp = VanillaGP(gpy_gp)
            bq_model = IntegrandModel(warped_gp, bq_prior)

            for i in range(10):
                if i % 5 == 0:
                    gpy_gp.optimize_restarts(num_restarts=5)
                failed = True
                while failed:
                    try:
                        batch = select_batch(bq_model, 1, KRIGING_BELIEVER)
                        failed = False
                    except FloatingPointError:
                        gpy_gp.optimize_restarts(num_restarts=5)

                X = np.array(batch)
                Y = get_log_joint(X)

                bq_model.update(batch, Y)

            gpy_gp.optimize_restarts(num_restarts=5)

            bq_elbo = bq_model.integral_mean()

            import scipy.integrate
            def integrand(z):
                return get_log_joint(z) * np.exp(bq_prior.logpdf(np.atleast_2d(z)))
            brute_force_elbo = scipy.integrate.quad(integrand, -10, 10)

            print("BQ ", bq_elbo)
            print("ACTUAL ELBO ", brute_force_elbo)

    return grads
Esempio n. 9
0
val_texts, val_labels = zip(*((sample['text'], sample['sentiment'])
                              for sample in val_data))
test_texts, test_labels = zip(*((sample['text'], sample['sentiment'])
                                for sample in test_data))

dictionary = p1.bag_of_words(train_texts)

train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary)
val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary)
test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary)

#-------------------------------------------------------------------------------
# Calculate theta & theta_0 using each algorithm
#-------------------------------------------------------------------------------

toy_features, toy_labels = toy_data = utils.load_toy_data(
    cwd + '\\Review-Analyzer\\toy_data.tsv')

T = 10
L = 0.2

thetas_perceptron = p1.perceptron(toy_features, toy_labels, T)
thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T)
thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L)


def plot_toy_results(algo_name, thetas):
    print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0]))))
    print('theta_0 for', algo_name, 'is', str(thetas[1]))
    utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas)