def test_fourier_features(kernels, make_data): """Test random fourier kernels approximations.""" D = 100 S = 3 kern, p = kernels k = kern(D, **p) x, _, _ = make_data x_, X_ = _make_placeholders(x, S) N = x.shape[0] Phi, KL = ab.RandomFourier(D, k)(X_) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() P = Phi.eval(feed_dict={x_: x}) for i in range(P.shape[0]): p = P[i] assert p.shape == (N, 2 * D) # Check behaving properly with k(x, x) ~ 1.0 assert np.allclose((p**2).sum(axis=1), np.ones(N)) # Make sure we get a valid KL kl = KL.eval() if isinstance(KL, tf.Tensor) else KL assert kl >= 0
def deep_gaussian_process(X, Y): """Deep Gaussian Process Regression.""" noise = ab.pos_variable(.1) net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(n_features=20, kernel=ab.RBF(learn_lenscale=True)) >> ab.DenseVariational(output_dim=5, full=False) >> ab.RandomFourier(n_features=10, kernel=ab.RBF(1., seed=1)) >> ab.DenseVariational(output_dim=1, full=False, learn_prior=True) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def deep_gaussian_process(X, Y): """Deep Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.01) # Likelihood st. dev. initialisation lenscale = tf.Variable(1.) # learn the length scale net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=20, kernel=ab.RBF(ab.pos(lenscale))) >> ab.DenseVariational( output_dim=5, std=lambda_, full=False) >> ab.RandomFourier( n_features=10, kernel=ab.RBF(1.)) >> ab.DenseVariational( output_dim=1, std=lambda_, full=False)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def my_model(features, labels, mode, params): N = params["N"] n_samples = NSAMPLES if mode == tf.estimator.ModeKeys.TRAIN \ else NPREDICTSAMPLES X = tf.feature_column.input_layer(features, params['feature_columns']) kernel = ab.RBF(LENSCALE, learn_lenscale=True) net = ( ab.InputLayer(name="X", n_samples=n_samples) >> ab.RandomFourier(n_features=NFEATURES, kernel=kernel) >> ab.Dense(output_dim=64, init_fn="autonorm") >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1, full=False, prior_std=1.0, learn_prior=True) ) phi, kl = net(X=X) std = ab.pos_variable(NOISE, name="noise") ll_f = tf.distributions.Normal(loc=phi, scale=std) predict_mean = ab.sample_mean(phi) # Compute predictions. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'predictions': predict_mean, 'samples': phi } return tf.estimator.EstimatorSpec(mode, predictions=predictions) ll = ll_f.log_prob(labels) loss = ab.elbo(ll, kl, N) tf.summary.scalar('loss', loss) # Compute evaluation metrics. mse = tf.metrics.mean_squared_error(labels=labels, predictions=predict_mean, name='mse_op') r2 = r2_metric(labels, predict_mean) metrics = {'mse': mse, 'r2': r2} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def gaussian_process(X, Y): """Gaussian Process Regression.""" noise = ab.pos_variable(.5) kern = ab.RBF(learn_lenscale=False) # learn lengthscale net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(n_features=50, kernel=kern) >> ab.DenseVariational(output_dim=1, full=True, learn_prior=True) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def svr(X, Y): """Support vector regressor.""" reg = 0.1 eps = 0.01 lenscale = 1. kern = ab.RBF(lenscale=lenscale) # keep the length scale positive net = ( ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier(n_features=50, kernel=kern) >> ab.DenseMAP(output_dim=1, l2_reg=reg, l1_reg=0.) ) phi, reg = net(X=X) loss = tf.reduce_mean(tf.maximum(tf.abs(Y - phi - eps), 0.)) + reg return phi, loss
def gaussian_process(X, Y): """Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.5) # Likelihood st. dev. initialisation, and learning lenscale = tf.Variable(1.) # learn the length scale kern = ab.RBF(lenscale=ab.pos(lenscale)) # keep the length scale positive # kern = ab.RBFVariational(lenscale=ab.pos(lenscale)) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=50, kernel=kern) >> ab.DenseVariational( output_dim=1, std=lambda_, full=True)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) # lkhood = tf.distributions.StudentT(df=1., loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def svr(X, Y): """Support vector regressor, kind of...""" lambda_ = 1e-4 eps = 0.01 lenscale = 1. # Specify which kernel to approximate with the random Fourier features kern = ab.RBF(lenscale=lenscale) net = ( # ab.InputLayer(name="X", n_samples=n_samples_) >> ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier( n_features=50, kernel=kern) >> # ab.DropOut(keep_prob=0.9) >> ab.DenseMAP(output_dim=1, l2_reg=lambda_, l1_reg=0.)) f, reg = net(X=X) loss = tf.reduce_mean(tf.nn.relu(tf.abs(Y - f) - eps)) + reg return f, loss
def main(): """Run the demo.""" # Get Continuous and categorical data df_train, df_test = fetch_data() df = pd.concat((df_train, df_test)) X_con, X_cat, n_cats, Y = input_fn(df) n_samples_ = tf.placeholder_with_default(T_SAMPLES, []) # Define the continuous layers con_layer = ( ab.InputLayer(name='con', n_samples=n_samples_) >> ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >> ab.Dense(output_dim=16, init_fn="autonorm") ) # Now define the cateogrical layers, which we embed # Note every Embed call can be different, this is just "lazy" cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm") for i in n_cats] cat_layer = ( ab.InputLayer(name='cat', n_samples=n_samples_) >> ab.PerFeature(*cat_layer_list) >> # Assign columns to embedding layers ab.Activation(tf.nn.selu) >> ab.Dense(16, init_fn="autonorm") ) # Now we can feed the initial continuous and cateogrical layers to further # "joint" layers after we concatenate them net = ( ab.Concat(con_layer, cat_layer) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1) ) # Split data into training and testing Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0) Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0) Yt, Ys = np.split(Y, [len(df_train)], axis=0) # Graph place holders X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]]) X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]]) Y_ = tf.placeholder(tf.float32, [None, 1]) # Feed dicts train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt} test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES} # Make model N = len(Xt_con) nn, kl = net(con=X_con_, cat=X_cat_) likelihood = tf.distributions.Bernoulli(logits=nn) prob = ab.sample_mean(likelihood.probs) loss = ab.elbo(likelihood.log_prob(Y_), kl, N) optimizer = tf.train.AdamOptimizer() train = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session(config=CONFIG): init.run() # We're going to just use a feed_dict to feed in batches, which we # generate here batches = ab.batch( train_dict, batch_size=BSIZE, n_iter=NITER) for i, data in enumerate(batches): train.run(feed_dict=data) if i % 1000 == 0: loss_val = loss.eval(feed_dict=data) print("Iteration {}, loss = {}".format(i, loss_val)) # Predict Ep = prob.eval(feed_dict=test_dict) Ey = Ep > 0.5 # Max probability assignment acc = accuracy_score(Ys.flatten(), Ey.flatten()) logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep))) print("Accuracy = {}, log loss = {}".format(acc, logloss))
NFEATURES = 1500 # Number of random features/bases to use in the approximation NOISE = 3.0 # Initial estimate of the observation noise # Random Fourier Features, this is setting up an anisotropic length scale, or # one length scale per dimension LENSCALE = tf.Variable(5 * np.ones((21, 1), dtype=np.float32)) KERNEL = ab.RBF(ab.pos(LENSCALE)) # Variational Fourier Features -- length-scale setting here is the "prior" # LENSCALE = 10. # KERNEL = ab.RBFVariational(lenscale=LENSCALE, lenscale_posterior=LENSCALE) # Build the approximate GP net = ab.stack( ab.InputLayer(name='X', n_samples=NSAMPLES), ab.RandomFourier(n_features=NFEATURES, kernel=KERNEL), ab.DenseVariational(output_dim=1, full=True) ) # Learning and prediction settings BATCH_SIZE = 50 # number of observations per mini batch NEPOCHS = 100 # Number of times to iterate though the dataset NPREDICTSAMPLES = 10 # results in NSAMPLES * NPREDICTSAMPLES samples CONFIG = tf.ConfigProto(device_count={'GPU': 1}) # Use GPU ? def main(): """Run the demo.""" data = fetch_gpml_sarcos_data() Xr = data.train.data.astype(np.float32)
kern = ab.RBF(learn_lenscale=True) # keep the length scale positive # Variational Fourier Features -- length-scale setting here is the "prior", we # can choose to optimise this or not # lenscale = 1. # kern = ab.RBFVariational(lenscale=lenscale) # This is VAR-FIXED kernel from # Cutjar et. al. 2017 # This is how we make the "latent function" of a Gaussian process, here # n_features controls how many random basis functions we use in the # approximation. The more of these, the more accurate, but more costly # computationally. "full" indicates we want a full-covariance matrix Gaussian # posterior of the model weights. This is optional, but it does greatly improve # the model uncertainty away from the data. n_samples_ = tf.placeholder(tf.int32) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=200, kernel=kern) >> ab.DenseVariational( output_dim=1, learn_prior=True, full=True)) def main(): """Run the demo.""" n_iters = int(round(n_epochs * N / batch_size)) print("Iterations = {}".format(n_iters)) # Get training and testing data Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise) # Prediction points Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis] Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis]
# Variational Fourier Features -- length-scale setting here is the "prior", we # can choose to optimise this or not lenscale = 1. kern = ab.RBFVariational(lenscale=lenscale) # This is VAR-FIXED kernel from # Cutjar et. al. 2017 # This is how we make the "latent function" of a Gaussian process, here # n_features controls how many random basis functions we use in the # approximation. The more of these, the more accurate, but more costly # computationally. "full" indicates we want a full-covariance matrix Gaussian # posterior of the model weights. This is optional, but it does greatly improve # the model uncertainty away from the data. n_samples_ = tf.placeholder(tf.int32) net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(n_features=100, kernel=kern) >> ab.DenseVariational(output_dim=1, std=reg, full=True) ) def main(): """Run the demo.""" n_iters = int(round(n_epochs * N / batch_size)) print("Iterations = {}".format(n_iters)) # Get training and testing data Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise) # Prediction points Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis] Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis]