Exemple #1
0
def test_predict_samples():
    """Test the predict_samples aggregator."""
    X = np.ones((10, 100, 1), dtype=np.float32)
    X_ = tf.placeholder(tf.float32, (10, None, 1))
    Xt = tf.identity(X_)

    tc = tf.test.TestCase()
    with tc.test_session():
        samps = ab.predict_samples(Xt, {X_: X}, n_groups=10)  # 10 replicates
        assert samps.shape == (100, 100, 1)
        assert np.allclose(samps, 1.)
Exemple #2
0
def main():
    """Run the demo."""
    data = fetch_gpml_sarcos_data()
    Xr = data.train.data.astype(np.float32)
    Yr = data.train.targets.astype(np.float32)[:, np.newaxis]
    Xs = data.test.data.astype(np.float32)
    Ys = data.test.targets.astype(np.float32)[:, np.newaxis]
    N, D = Xr.shape

    print("Iterations: {}".format(int(round(N * NEPOCHS / BATCH_SIZE))))

    # Scale and centre the data, as per the original experiment
    ss = StandardScaler()
    Xr = ss.fit_transform(Xr)
    Xs = ss.transform(Xs)
    ym = Yr.mean()
    Yr -= ym
    Ys -= ym

    # Training batches
    data_tr = Dataset.from_tensor_slices({'X': Xr, 'Y': Yr}) \
        .shuffle(buffer_size=1000) \
        .batch(BATCH_SIZE)

    # Testing iterators
    data_ts = Dataset.from_tensors({'X': Xs, 'Y': Ys}).repeat()

    with tf.name_scope("DataIterators"):
        iterator = Iterator.from_structure(data_tr.output_types,
                                           data_tr.output_shapes)
        data = iterator.get_next()
        training_init = iterator.make_initializer(data_tr)
        testing_init = iterator.make_initializer(data_ts)

    with tf.name_scope("Deepnet"):
        phi, kl = net(X=data['X'])
        std = tf.Variable(NOISE, name="noise")
        lkhood = tf.distributions.Normal(phi, scale=ab.pos(std))
        loss = ab.elbo(lkhood, data['Y'], N, kl)
        tf.summary.scalar('loss', loss)

    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer()
        global_step = tf.train.create_global_step()
        train = optimizer.minimize(loss, global_step=global_step)

    with tf.name_scope("Test"):
        r2 = rsquare(data['Y'], phi)

    # Logging
    log = tf.train.LoggingTensorHook(
        {'step': global_step, 'loss': loss},
        every_n_iter=1000
    )

    with tf.train.MonitoredTrainingSession(
            config=CONFIG,
            scaffold=tf.train.Scaffold(local_init_op=training_init),
            checkpoint_dir="./sarcos/",
            save_summaries_steps=None,
            save_checkpoint_secs=20,
            save_summaries_secs=20,
            hooks=[log]
    ) as sess:
        summary_writer = sess._hooks[1]._summary_writer
        for i in range(NEPOCHS):

            # Train for one epoch
            try:
                while not sess.should_stop():
                    sess.run(train)
            except tf.errors.OutOfRangeError:
                pass

            # Init testing and assess and log R-square score on test set
            sess.run(testing_init)
            r2_score = sess.run(r2)
            score_sum = tf.Summary(value=[
                tf.Summary.Value(tag='r-square', simple_value=r2_score)
            ])
            summary_writer.add_summary(score_sum, sess.run(global_step))

            # Re-init training
            sess.run(training_init)

        # Prediction
        sess.run(testing_init)
        Ey = ab.predict_samples(phi, feed_dict=None, n_groups=NPREDICTSAMPLES,
                                session=sess)
        sigma = sess.run(std)
        r2_score = sess.run(r2)

    # Score mean standardised log likelihood
    Eymean = Ey.mean(axis=0)
    Eyvar = Ey.var(axis=0) + sigma**2  # add sigma2 for obervation noise
    snlp = msll(Ys.flatten(), Eymean, Eyvar, Yr.flatten())

    print("------------")
    print("r-square: {:.4f}, smse: {:.4f}, msll: {:.4f}."
          .format(r2_score, 1 - r2_score, snlp))
Exemple #3
0
def main():
    """Run the imputation demo."""
    # Fetch data, one-hot targets and standardise data
    data = fetch_covtype()
    X = data.data
    Y = (data.target - 1)
    X = StandardScaler().fit_transform(X)

    # Now fake some missing data with a mask
    rnd = np.random.RandomState(RSEED)
    mask = rnd.rand(*X.shape) < FRAC_MISSING
    X[mask] = MISSING_VAL

    # Use Aboleth to learn imputation statistics
    if USE_ABOLETH:
        net = ab.LearnedNormalImpute(data_input, mask_input) >> layers

    # Or just mean impute
    else:
        net = data_input >> layers
        imp = Imputer(missing_values=MISSING_VAL, strategy='mean')
        X = imp.fit_transform(X)

    # Split the training and testing data
    X_tr, X_ts, Y_tr, Y_ts, M_tr, M_ts = train_test_split(X.astype(np.float32),
                                                          Y.astype(np.int32),
                                                          mask,
                                                          test_size=FRAC_TEST,
                                                          random_state=RSEED)
    N_tr, D = X_tr.shape

    # Data
    with tf.name_scope("Input"):
        Xb, Yb, Mb = batch_training(X_tr,
                                    Y_tr,
                                    M_tr,
                                    n_epochs=NEPOCHS,
                                    batch_size=BSIZE)
        X_ = tf.placeholder_with_default(Xb, shape=(None, D))
        # Y_ has to be this dimension for compatability with Categorical
        Y_ = tf.placeholder_with_default(Yb, shape=(None, ))
        M_ = tf.placeholder_with_default(Mb, shape=(None, D))

    with tf.name_scope("Deepnet"):
        # Conditionally assign a placeholder for masks if USE_ABOLETH
        nn, kl = net(X=X_, M=M_) if USE_ABOLETH else net(X=X_)
        lkhood = tf.distributions.Categorical(logits=nn)
        loss = ab.elbo(lkhood, Y_, N_tr, kl)

    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer()
        global_step = tf.train.create_global_step()
        train = optimizer.minimize(loss, global_step=global_step)

    # Logging learning progress
    log = tf.train.LoggingTensorHook({
        'step': global_step,
        'loss': loss
    },
                                     every_n_iter=1000)

    # This is the main training "loop"
    with tf.train.MonitoredTrainingSession(config=CONFIG,
                                           save_summaries_steps=None,
                                           save_checkpoint_secs=None,
                                           hooks=[log]) as sess:
        try:
            while not sess.should_stop():
                sess.run(train)
        except tf.errors.OutOfRangeError:
            print('Input queues have been exhausted!')
            pass

        # Prediction
        feed_dict = {X_: X_ts, Y_: [0]}
        if USE_ABOLETH:
            feed_dict[M_] = M_ts

        Ep = ab.predict_samples(lkhood.probs,
                                feed_dict=feed_dict,
                                n_groups=PSAMPLES,
                                session=sess)

    # Get mean of samples for prediction, and max probability assignments
    p = Ep.mean(axis=0)
    Ey = p.argmax(axis=1)

    # Score results
    acc = accuracy_score(Y_ts, Ey)
    ll = log_loss(Y_ts, p)
    conf = confusion_matrix(Y_ts, Ey)
    print("Final scores:")
    print("\tAccuracy = {}\n\tLog loss = {}\n\tConfusion =\n{}".format(
        acc, ll, conf))
Exemple #4
0
def main():
    """Run the demo."""
    n_iters = int(round(n_epochs * N / batch_size))
    print("Iterations = {}".format(n_iters))

    # Latent function
    def f(X):
        Y = np.sin(X) / X
        return Y

    # Get training and testing data
    rnd = np.random.RandomState(RSEED)
    Xr = (rnd.rand(N, 1) * 20 - 10).astype(np.float32)
    Yr = f(Xr) + rnd.randn(N, 1).astype(np.float32) * true_noise
    Xs = np.linspace(-14, 14, Ns, dtype=np.float32)[:, np.newaxis]
    Ys = f(Xs)
    test_bounds = np.logical_and(Xs[:, 0] > -10, Xs[:, 0] < 10)

    _, D = Xr.shape

    # Name the "data" parts of the graph
    with tf.name_scope("Input"):
        # This function will make a TensorFlow queue for shuffling and batching
        # the data, and will run through n_epochs of the data.
        Xb, Yb = batch_training(Xr, Yr, n_epochs=n_epochs,
                                batch_size=batch_size)
        X_ = tf.placeholder_with_default(Xb, shape=(None, D))
        Y_ = tf.placeholder_with_default(Yb, shape=(None, 1))

    # This is where we build the actual GP model
    with tf.name_scope("Model"):
        phi, loss = model_dict[model](X_, Y_)

    # Set up the trainig graph
    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer()
        global_step = tf.train.create_global_step()
        train = optimizer.minimize(loss, global_step=global_step)

    # Logging learning progress
    log = tf.train.LoggingTensorHook(
        {'step': global_step, 'loss': loss},
        every_n_iter=1000
    )

    # This is the main training "loop"
    with tf.train.MonitoredTrainingSession(
            config=config,
            save_summaries_steps=None,
            save_checkpoint_secs=None,
            hooks=[log]
    ) as sess:
        try:
            while not sess.should_stop():
                sess.run(train)
        except tf.errors.OutOfRangeError:
            print('Input queues have been exhausted!')
            pass

        # Prediction, the [[None]] is to stop the default placeholder queue
        if model in probabilistic:
            Ey = ab.predict_samples(phi, feed_dict={X_: Xs, Y_: [[None]]},
                                    n_groups=n_pred_samples, session=sess)
            Eymean = Ey.mean(axis=0)  # Average samples
        else:
            Eymean = sess.run(phi, feed_dict={X_: Xs, Y_: [[None]]})

    # Score
    r2 = r2_score(Ys.flatten()[test_bounds], Eymean.flatten()[test_bounds])
    print("Score: {:.4f}".format(r2))
    # import IPython; IPython.embed()

    # Plot
    f = bk.figure(sizing_mode='stretch_both',
                  title="{}, R-square = {:.4f}".format(model, r2))
    if model in probabilistic:
        for y in Ey:
            f.line(Xs.flatten(), y.flatten(), line_color='green',
                   legend='Samples', alpha=0.1)
    f.circle(Xr.flatten(), Yr.flatten(), fill_color='blue', legend='Training')
    f.line(Xs.flatten(), Ys.flatten(), line_color='blue', line_width=3,
           legend='Truth')
    f.line(Xs.flatten(), Eymean.flatten(), line_color='green', legend='Mean',
           line_width=3)
    bk.show(f)
Exemple #5
0
def main():
    """Run the demo."""
    n_iters = int(round(n_epochs * N / batch_size))
    print("Iterations = {}".format(n_iters))

    # Get training and testing data
    Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise)

    # Prediction points
    Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis]
    Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis]

    # Set up the probability image query points
    Xi, Yi = np.meshgrid(Xq, Yq)
    Xi = Xi.astype(np.float32).reshape(-1, 1)
    Yi = Yi.astype(np.float32).reshape(-1, 1)

    _, D = Xr.shape

    # Name the "data" parts of the graph
    with tf.name_scope("Input"):
        # This function will make a TensorFlow queue for shuffling and batching
        # the data, and will run through n_epochs of the data.
        Xb, Yb = batch_training(Xr,
                                Yr,
                                n_epochs=n_epochs,
                                batch_size=batch_size)
        X_ = tf.placeholder_with_default(Xb, shape=(None, D))
        Y_ = tf.placeholder_with_default(Yb, shape=(None, 1))

    # This is where we build the actual GP model
    with tf.name_scope("Deepnet"):
        phi, kl = net(X=X_)
        lkhood = tf.distributions.Normal(loc=phi, scale=ab.pos(noise))
        loss = ab.elbo(lkhood, Y_, N, kl)

    # Set up the trainig graph
    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer()
        global_step = tf.train.create_global_step()
        train = optimizer.minimize(loss, global_step=global_step)

    # This is used for building the predictive density image
    with tf.name_scope("Predict"):
        logprob = lkhood.log_prob(Y_)

    # Logging learning progress
    log = tf.train.LoggingTensorHook({
        'step': global_step,
        'loss': loss
    },
                                     every_n_iter=1000)

    # This is the main training "loop"
    with tf.train.MonitoredTrainingSession(config=config,
                                           save_summaries_steps=None,
                                           save_checkpoint_secs=None,
                                           hooks=[log]) as sess:
        try:
            while not sess.should_stop():
                sess.run(train)
        except tf.errors.OutOfRangeError:
            print('Input queues have been exhausted!')
            pass

        # Prediction, the [[None]] is to stop the default placeholder queue
        Ey = ab.predict_samples(phi,
                                feed_dict={
                                    X_: Xq,
                                    Y_: [[None]]
                                },
                                n_groups=n_pred_samples,
                                session=sess)
        logPY = ab.predict_expected(logprob,
                                    feed_dict={
                                        Y_: Yi,
                                        X_: Xi
                                    },
                                    n_groups=n_pred_samples,
                                    session=sess)

    Eymean = Ey.mean(axis=0)  # Average samples to get mean predicted funtion
    Py = np.exp(logPY.reshape(Ns, Ns))  # Turn log-prob into prob

    # Plot
    im_min = np.amin(Py)
    im_size = np.amax(Py) - im_min
    img = (Py - im_min) / im_size
    f = bk.figure(tools='pan,box_zoom,reset', sizing_mode='stretch_both')
    f.image(image=[img], x=-20., y=-4., dw=40., dh=8, palette=bp.Plasma256)
    f.circle(Xr.flatten(), Yr.flatten(), fill_color='blue', legend='Training')
    f.line(Xs.flatten(), Ys.flatten(), line_color='blue', legend='Truth')
    for y in Ey:
        f.line(Xq.flatten(),
               y.flatten(),
               line_color='red',
               legend='Samples',
               alpha=0.2)
    f.line(Xq.flatten(), Eymean.flatten(), line_color='green', legend='Mean')
    bk.show(f)