Exemple #1
0
def test_fourier_features(kernels, make_data):
    """Test random fourier kernels approximations."""
    D = 100
    S = 3
    kern, p = kernels
    k = kern(D, **p)

    x, _, _ = make_data
    x_, X_ = _make_placeholders(x, S)
    N = x.shape[0]

    Phi, KL = ab.RandomFourier(D, k)(X_)

    tc = tf.test.TestCase()
    with tc.test_session():
        tf.global_variables_initializer().run()
        P = Phi.eval(feed_dict={x_: x})
        for i in range(P.shape[0]):
            p = P[i]
            assert p.shape == (N, 2 * D)
            # Check behaving properly with k(x, x) ~ 1.0
            assert np.allclose((p**2).sum(axis=1), np.ones(N))

        # Make sure we get a valid KL
        kl = KL.eval() if isinstance(KL, tf.Tensor) else KL
        assert kl >= 0
def deep_gaussian_process(X, Y):
    """Deep Gaussian Process Regression."""
    noise = ab.pos_variable(.1)

    net = (
        ab.InputLayer(name="X", n_samples=n_samples_) >>
        ab.RandomFourier(n_features=20, kernel=ab.RBF(learn_lenscale=True)) >>
        ab.DenseVariational(output_dim=5, full=False) >>
        ab.RandomFourier(n_features=10, kernel=ab.RBF(1., seed=1)) >>
        ab.DenseVariational(output_dim=1, full=False, learn_prior=True)
    )

    f, kl = net(X=X)
    lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y)
    loss = ab.elbo(lkhood, kl, N)

    return f, loss
Exemple #3
0
def deep_gaussian_process(X, Y):
    """Deep Gaussian Process Regression."""
    lambda_ = 0.1  # Initial weight prior std. dev, this is optimised later
    noise = tf.Variable(.01)  # Likelihood st. dev. initialisation
    lenscale = tf.Variable(1.)  # learn the length scale

    net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(
        n_features=20, kernel=ab.RBF(ab.pos(lenscale))) >> ab.DenseVariational(
            output_dim=5, std=lambda_, full=False) >> ab.RandomFourier(
                n_features=10, kernel=ab.RBF(1.)) >> ab.DenseVariational(
                    output_dim=1, std=lambda_, full=False))

    f, kl = net(X=X)
    lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise))
    loss = ab.elbo(lkhood, Y, N, kl)

    return f, loss
Exemple #4
0
def my_model(features, labels, mode, params):

    N = params["N"]
    n_samples = NSAMPLES if mode == tf.estimator.ModeKeys.TRAIN \
        else NPREDICTSAMPLES

    X = tf.feature_column.input_layer(features, params['feature_columns'])

    kernel = ab.RBF(LENSCALE, learn_lenscale=True)
    net = (
        ab.InputLayer(name="X", n_samples=n_samples) >>
        ab.RandomFourier(n_features=NFEATURES, kernel=kernel) >>
        ab.Dense(output_dim=64, init_fn="autonorm") >>
        ab.Activation(tf.nn.selu) >>
        ab.DenseVariational(output_dim=1, full=False, prior_std=1.0,
                            learn_prior=True)
    )

    phi, kl = net(X=X)
    std = ab.pos_variable(NOISE, name="noise")
    ll_f = tf.distributions.Normal(loc=phi, scale=std)
    predict_mean = ab.sample_mean(phi)

    # Compute predictions.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'predictions': predict_mean,
            'samples': phi
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    ll = ll_f.log_prob(labels)
    loss = ab.elbo(ll, kl, N)
    tf.summary.scalar('loss', loss)

    # Compute evaluation metrics.
    mse = tf.metrics.mean_squared_error(labels=labels,
                                        predictions=predict_mean,
                                        name='mse_op')
    r2 = r2_metric(labels, predict_mean)
    metrics = {'mse': mse,
               'r2': r2}

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode, loss=loss, eval_metric_ops=metrics)

    # Create training op.
    assert mode == tf.estimator.ModeKeys.TRAIN

    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def gaussian_process(X, Y):
    """Gaussian Process Regression."""
    noise = ab.pos_variable(.5)
    kern = ab.RBF(learn_lenscale=False)  # learn lengthscale

    net = (
        ab.InputLayer(name="X", n_samples=n_samples_) >>
        ab.RandomFourier(n_features=50, kernel=kern) >>
        ab.DenseVariational(output_dim=1, full=True, learn_prior=True)
    )

    f, kl = net(X=X)
    lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y)
    loss = ab.elbo(lkhood, kl, N)

    return f, loss
Exemple #6
0
def svr(X, Y):
    """Support vector regressor."""
    reg = 0.1
    eps = 0.01
    lenscale = 1.

    kern = ab.RBF(lenscale=lenscale)  # keep the length scale positive
    net = (
        ab.InputLayer(name="X", n_samples=1) >>
        ab.RandomFourier(n_features=50, kernel=kern) >>
        ab.DenseMAP(output_dim=1, l2_reg=reg, l1_reg=0.)
    )

    phi, reg = net(X=X)
    loss = tf.reduce_mean(tf.maximum(tf.abs(Y - phi - eps), 0.)) + reg
    return phi, loss
Exemple #7
0
def gaussian_process(X, Y):
    """Gaussian Process Regression."""
    lambda_ = 0.1  # Initial weight prior std. dev, this is optimised later
    noise = tf.Variable(.5)  # Likelihood st. dev. initialisation, and learning
    lenscale = tf.Variable(1.)  # learn the length scale
    kern = ab.RBF(lenscale=ab.pos(lenscale))  # keep the length scale positive
    # kern = ab.RBFVariational(lenscale=ab.pos(lenscale))

    net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(
        n_features=50, kernel=kern) >> ab.DenseVariational(
            output_dim=1, std=lambda_, full=True))

    f, kl = net(X=X)
    lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise))
    # lkhood = tf.distributions.StudentT(df=1., loc=f, scale=ab.pos(noise))
    loss = ab.elbo(lkhood, Y, N, kl)

    return f, loss
Exemple #8
0
def svr(X, Y):
    """Support vector regressor, kind of..."""
    lambda_ = 1e-4
    eps = 0.01
    lenscale = 1.

    # Specify which kernel to approximate with the random Fourier features
    kern = ab.RBF(lenscale=lenscale)

    net = (
        # ab.InputLayer(name="X", n_samples=n_samples_) >>
        ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier(
            n_features=50, kernel=kern) >>
        # ab.DropOut(keep_prob=0.9) >>
        ab.DenseMAP(output_dim=1, l2_reg=lambda_, l1_reg=0.))

    f, reg = net(X=X)
    loss = tf.reduce_mean(tf.nn.relu(tf.abs(Y - f) - eps)) + reg
    return f, loss
Exemple #9
0
def main():
    """Run the demo."""
    # Get Continuous and categorical data
    df_train, df_test = fetch_data()
    df = pd.concat((df_train, df_test))
    X_con, X_cat, n_cats, Y = input_fn(df)

    n_samples_ = tf.placeholder_with_default(T_SAMPLES, [])

    # Define the continuous layers
    con_layer = (
        ab.InputLayer(name='con', n_samples=n_samples_) >>
        ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >>
        ab.Dense(output_dim=16, init_fn="autonorm")
    )

    # Now define the cateogrical layers, which we embed
    # Note every Embed call can be different, this is just "lazy"
    cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm")
                      for i in n_cats]
    cat_layer = (
        ab.InputLayer(name='cat', n_samples=n_samples_) >>
        ab.PerFeature(*cat_layer_list) >>  # Assign columns to embedding layers
        ab.Activation(tf.nn.selu) >>
        ab.Dense(16, init_fn="autonorm")
    )

    # Now we can feed the initial continuous and cateogrical layers to further
    # "joint" layers after we concatenate them
    net = (
        ab.Concat(con_layer, cat_layer) >>
        ab.Activation(tf.nn.selu) >>
        ab.DenseVariational(output_dim=1)
    )

    # Split data into training and testing
    Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0)
    Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0)
    Yt, Ys = np.split(Y, [len(df_train)], axis=0)

    # Graph place holders
    X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]])
    X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]])
    Y_ = tf.placeholder(tf.float32, [None, 1])

    # Feed dicts
    train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt}
    test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES}

    # Make model
    N = len(Xt_con)
    nn, kl = net(con=X_con_, cat=X_cat_)
    likelihood = tf.distributions.Bernoulli(logits=nn)
    prob = ab.sample_mean(likelihood.probs)

    loss = ab.elbo(likelihood.log_prob(Y_), kl, N)
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
    init = tf.global_variables_initializer()

    with tf.Session(config=CONFIG):
        init.run()

        # We're going to just use a feed_dict to feed in batches, which we
        # generate here
        batches = ab.batch(
            train_dict,
            batch_size=BSIZE,
            n_iter=NITER)

        for i, data in enumerate(batches):
            train.run(feed_dict=data)
            if i % 1000 == 0:
                loss_val = loss.eval(feed_dict=data)
                print("Iteration {}, loss = {}".format(i, loss_val))

        # Predict
        Ep = prob.eval(feed_dict=test_dict)

    Ey = Ep > 0.5  # Max probability assignment

    acc = accuracy_score(Ys.flatten(), Ey.flatten())
    logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep)))

    print("Accuracy = {}, log loss = {}".format(acc, logloss))
Exemple #10
0
NFEATURES = 1500  # Number of random features/bases to use in the approximation
NOISE = 3.0  # Initial estimate of the observation noise

# Random Fourier Features, this is setting up an anisotropic length scale, or
# one length scale per dimension
LENSCALE = tf.Variable(5 * np.ones((21, 1), dtype=np.float32))
KERNEL = ab.RBF(ab.pos(LENSCALE))

# Variational Fourier Features -- length-scale setting here is the "prior"
# LENSCALE = 10.
# KERNEL = ab.RBFVariational(lenscale=LENSCALE, lenscale_posterior=LENSCALE)

# Build the approximate GP
net = ab.stack(
    ab.InputLayer(name='X', n_samples=NSAMPLES),
    ab.RandomFourier(n_features=NFEATURES, kernel=KERNEL),
    ab.DenseVariational(output_dim=1, full=True)
)

# Learning and prediction settings
BATCH_SIZE = 50  # number of observations per mini batch
NEPOCHS = 100  # Number of times to iterate though the dataset
NPREDICTSAMPLES = 10  # results in NSAMPLES * NPREDICTSAMPLES samples

CONFIG = tf.ConfigProto(device_count={'GPU': 1})  # Use GPU ?


def main():
    """Run the demo."""
    data = fetch_gpml_sarcos_data()
    Xr = data.train.data.astype(np.float32)
Exemple #11
0
kern = ab.RBF(learn_lenscale=True)  # keep the length scale positive

# Variational Fourier Features -- length-scale setting here is the "prior", we
# can choose to optimise this or not
# lenscale = 1.
# kern = ab.RBFVariational(lenscale=lenscale)  # This is VAR-FIXED kernel from
# Cutjar et. al. 2017

# This is how we make the "latent function" of a Gaussian process, here
# n_features controls how many random basis functions we use in the
# approximation. The more of these, the more accurate, but more costly
# computationally. "full" indicates we want a full-covariance matrix Gaussian
# posterior of the model weights. This is optional, but it does greatly improve
# the model uncertainty away from the data.
n_samples_ = tf.placeholder(tf.int32)
net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(
    n_features=200, kernel=kern) >> ab.DenseVariational(
        output_dim=1, learn_prior=True, full=True))


def main():
    """Run the demo."""
    n_iters = int(round(n_epochs * N / batch_size))
    print("Iterations = {}".format(n_iters))

    # Get training and testing data
    Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise)

    # Prediction points
    Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis]
    Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis]
Exemple #12
0
# Variational Fourier Features -- length-scale setting here is the "prior", we
# can choose to optimise this or not
lenscale = 1.
kern = ab.RBFVariational(lenscale=lenscale)  # This is VAR-FIXED kernel from
# Cutjar et. al. 2017

# This is how we make the "latent function" of a Gaussian process, here
# n_features controls how many random basis functions we use in the
# approximation. The more of these, the more accurate, but more costly
# computationally. "full" indicates we want a full-covariance matrix Gaussian
# posterior of the model weights. This is optional, but it does greatly improve
# the model uncertainty away from the data.
n_samples_ = tf.placeholder(tf.int32)
net = (
    ab.InputLayer(name="X", n_samples=n_samples_) >>
    ab.RandomFourier(n_features=100, kernel=kern) >>
    ab.DenseVariational(output_dim=1, std=reg, full=True)
)


def main():
    """Run the demo."""
    n_iters = int(round(n_epochs * N / batch_size))
    print("Iterations = {}".format(n_iters))

    # Get training and testing data
    Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise)

    # Prediction points
    Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis]
    Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis]