def deep_gaussian_process(X, Y): """Deep Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.01) # Likelihood st. dev. initialisation lenscale = tf.Variable(1.) # learn the length scale net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=20, kernel=ab.RBF(ab.pos(lenscale))) >> ab.DenseVariational( output_dim=5, std=lambda_, full=False) >> ab.RandomFourier( n_features=10, kernel=ab.RBF(1.)) >> ab.DenseVariational( output_dim=1, std=lambda_, full=False)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def deep_gaussian_process(X, Y): """Deep Gaussian Process Regression.""" noise = ab.pos_variable(.1) net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(n_features=20, kernel=ab.RBF(learn_lenscale=True)) >> ab.DenseVariational(output_dim=5, full=False) >> ab.RandomFourier(n_features=10, kernel=ab.RBF(1., seed=1)) >> ab.DenseVariational(output_dim=1, full=False, learn_prior=True) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def my_model(features, labels, mode, params): N = params["N"] n_samples = NSAMPLES if mode == tf.estimator.ModeKeys.TRAIN \ else NPREDICTSAMPLES X = tf.feature_column.input_layer(features, params['feature_columns']) kernel = ab.RBF(LENSCALE, learn_lenscale=True) net = ( ab.InputLayer(name="X", n_samples=n_samples) >> ab.RandomFourier(n_features=NFEATURES, kernel=kernel) >> ab.Dense(output_dim=64, init_fn="autonorm") >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1, full=False, prior_std=1.0, learn_prior=True) ) phi, kl = net(X=X) std = ab.pos_variable(NOISE, name="noise") ll_f = tf.distributions.Normal(loc=phi, scale=std) predict_mean = ab.sample_mean(phi) # Compute predictions. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'predictions': predict_mean, 'samples': phi } return tf.estimator.EstimatorSpec(mode, predictions=predictions) ll = ll_f.log_prob(labels) loss = ab.elbo(ll, kl, N) tf.summary.scalar('loss', loss) # Compute evaluation metrics. mse = tf.metrics.mean_squared_error(labels=labels, predictions=predict_mean, name='mse_op') r2 = r2_metric(labels, predict_mean) metrics = {'mse': mse, 'r2': r2} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def gaussian_process(X, Y): """Gaussian Process Regression.""" noise = ab.pos_variable(.5) kern = ab.RBF(learn_lenscale=False) # learn lengthscale net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(n_features=50, kernel=kern) >> ab.DenseVariational(output_dim=1, full=True, learn_prior=True) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def svr(X, Y): """Support vector regressor.""" reg = 0.1 eps = 0.01 lenscale = 1. kern = ab.RBF(lenscale=lenscale) # keep the length scale positive net = ( ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier(n_features=50, kernel=kern) >> ab.DenseMAP(output_dim=1, l2_reg=reg, l1_reg=0.) ) phi, reg = net(X=X) loss = tf.reduce_mean(tf.maximum(tf.abs(Y - phi - eps), 0.)) + reg return phi, loss
def gaussian_process(X, Y): """Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.5) # Likelihood st. dev. initialisation, and learning lenscale = tf.Variable(1.) # learn the length scale kern = ab.RBF(lenscale=ab.pos(lenscale)) # keep the length scale positive # kern = ab.RBFVariational(lenscale=ab.pos(lenscale)) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=50, kernel=kern) >> ab.DenseVariational( output_dim=1, std=lambda_, full=True)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) # lkhood = tf.distributions.StudentT(df=1., loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def svr(X, Y): """Support vector regressor, kind of...""" lambda_ = 1e-4 eps = 0.01 lenscale = 1. # Specify which kernel to approximate with the random Fourier features kern = ab.RBF(lenscale=lenscale) net = ( # ab.InputLayer(name="X", n_samples=n_samples_) >> ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier( n_features=50, kernel=kern) >> # ab.DropOut(keep_prob=0.9) >> ab.DenseMAP(output_dim=1, l2_reg=lambda_, l1_reg=0.)) f, reg = net(X=X) loss = tf.reduce_mean(tf.nn.relu(tf.abs(Y - f) - eps)) + reg return f, loss
def main(): """Run the demo.""" # Get Continuous and categorical data df_train, df_test = fetch_data() df = pd.concat((df_train, df_test)) X_con, X_cat, n_cats, Y = input_fn(df) n_samples_ = tf.placeholder_with_default(T_SAMPLES, []) # Define the continuous layers con_layer = ( ab.InputLayer(name='con', n_samples=n_samples_) >> ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >> ab.Dense(output_dim=16, init_fn="autonorm") ) # Now define the cateogrical layers, which we embed # Note every Embed call can be different, this is just "lazy" cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm") for i in n_cats] cat_layer = ( ab.InputLayer(name='cat', n_samples=n_samples_) >> ab.PerFeature(*cat_layer_list) >> # Assign columns to embedding layers ab.Activation(tf.nn.selu) >> ab.Dense(16, init_fn="autonorm") ) # Now we can feed the initial continuous and cateogrical layers to further # "joint" layers after we concatenate them net = ( ab.Concat(con_layer, cat_layer) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1) ) # Split data into training and testing Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0) Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0) Yt, Ys = np.split(Y, [len(df_train)], axis=0) # Graph place holders X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]]) X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]]) Y_ = tf.placeholder(tf.float32, [None, 1]) # Feed dicts train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt} test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES} # Make model N = len(Xt_con) nn, kl = net(con=X_con_, cat=X_cat_) likelihood = tf.distributions.Bernoulli(logits=nn) prob = ab.sample_mean(likelihood.probs) loss = ab.elbo(likelihood.log_prob(Y_), kl, N) optimizer = tf.train.AdamOptimizer() train = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session(config=CONFIG): init.run() # We're going to just use a feed_dict to feed in batches, which we # generate here batches = ab.batch( train_dict, batch_size=BSIZE, n_iter=NITER) for i, data in enumerate(batches): train.run(feed_dict=data) if i % 1000 == 0: loss_val = loss.eval(feed_dict=data) print("Iteration {}, loss = {}".format(i, loss_val)) # Predict Ep = prob.eval(feed_dict=test_dict) Ey = Ep > 0.5 # Max probability assignment acc = accuracy_score(Ys.flatten(), Ey.flatten()) logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep))) print("Accuracy = {}, log loss = {}".format(acc, logloss))
tf.global_variables_initializer().run() P = Phi.eval(feed_dict={x_: x}) assert P.shape == (S, N, out_height, out_width, D) assert P.dtype == np.float32 assert np.isscalar(KL.eval(feed_dict={x_: x})) @pytest.mark.parametrize('layer_args', [ (SampleLayer, ()), (SampleLayer3, ()), (ab.Dense, (D, )), (ab.DenseVariational, (D, )), (ab.EmbedVariational, (2, D)), (ab.Conv2D, (8, (4, 4))), (ab.Conv2DVariational, (8, (4, 4))), (ab.RandomFourier, (2, ab.RBF())), (ab.RandomArcCosine, (2, )), ]) def test_sample_layer_input_exception(layer_args, make_data): """Make sure sample layers fail when the don't get a rank 3 tensor.""" x, _, _ = make_data layer, args = layer_args with pytest.raises(AssertionError): layer(*args)(x) @pytest.mark.parametrize('kernels', [(ab.RBF, {}), (ab.RBFVariational, {}), (ab.Matern, { 'p': 1 }), (ab.Matern, { 'p': 2
import aboleth as ab from aboleth.datasets import fetch_gpml_sarcos_data # Set up a python logger so we can see the output of MonitoredTrainingSession logger = logging.getLogger() logger.setLevel(logging.INFO) NSAMPLES = 10 # Number of random samples to get from an Aboleth net NFEATURES = 1500 # Number of random features/bases to use in the approximation NOISE = 3.0 # Initial estimate of the observation noise # Random Fourier Features, this is setting up an anisotropic length scale, or # one length scale per dimension LENSCALE = tf.Variable(5 * np.ones((21, 1), dtype=np.float32)) KERNEL = ab.RBF(ab.pos(LENSCALE)) # Variational Fourier Features -- length-scale setting here is the "prior" # LENSCALE = 10. # KERNEL = ab.RBFVariational(lenscale=LENSCALE, lenscale_posterior=LENSCALE) # Build the approximate GP net = ab.stack( ab.InputLayer(name='X', n_samples=NSAMPLES), ab.RandomFourier(n_features=NFEATURES, kernel=KERNEL), ab.DenseVariational(output_dim=1, full=True) ) # Learning and prediction settings BATCH_SIZE = 50 # number of observations per mini batch NEPOCHS = 100 # Number of times to iterate though the dataset
Ns = 400 # Number of testing points to generate kernel = kern(length_scale=0.5) # Kernel to use for making a random GP draw true_noise = 0.1 # Add noise to the GP draws, to make things a little harder # Model settings n_samples = 5 # Number of random samples to get from an Aboleth net p_samples = 30 # Number of samples for prediction n_epochs = 200 # how many times to see the data for training batch_size = 10 # mini batch size for stochastric gradients config = tf.ConfigProto(device_count={'GPU': 0}) # Use GPU? 0 is no # Model initialisation NOISE = 1. # Likelihood st. dev. initialisation, and learning # Random Fourier Features kern = ab.RBF(learn_lenscale=True) # keep the length scale positive # Variational Fourier Features -- length-scale setting here is the "prior", we # can choose to optimise this or not # lenscale = 1. # kern = ab.RBFVariational(lenscale=lenscale) # This is VAR-FIXED kernel from # Cutjar et. al. 2017 # This is how we make the "latent function" of a Gaussian process, here # n_features controls how many random basis functions we use in the # approximation. The more of these, the more accurate, but more costly # computationally. "full" indicates we want a full-covariance matrix Gaussian # posterior of the model weights. This is optional, but it does greatly improve # the model uncertainty away from the data. n_samples_ = tf.placeholder(tf.int32) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier(