def nnet_bayesian(X, Y): """Bayesian neural net.""" lambda_ = 1e-1 # Weight prior noise = tf.Variable(0.01) # Likelihood st. dev. initialisation net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.DenseVariational(output_dim=20, std=lambda_) >> ab.Activation( tf.nn.relu) >> ab.DenseVariational(output_dim=7, std=lambda_) >> ab.Activation(tf.nn.relu) >> ab.DenseVariational( output_dim=5, std=lambda_) >> ab.Activation( tf.tanh) >> ab.DenseVariational(output_dim=1, std=lambda_)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def deep_gaussian_process(X, Y): """Deep Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.01) # Likelihood st. dev. initialisation lenscale = tf.Variable(1.) # learn the length scale net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=20, kernel=ab.RBF(ab.pos(lenscale))) >> ab.DenseVariational( output_dim=5, std=lambda_, full=False) >> ab.RandomFourier( n_features=10, kernel=ab.RBF(1.)) >> ab.DenseVariational( output_dim=1, std=lambda_, full=False)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def nnet_bayesian(X, Y): """Bayesian neural net.""" noise = 0.01 net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.DenseVariational(output_dim=5) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=4) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=3) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def bayesian_linear(X, Y): """Bayesian Linear Regression.""" lambda_ = 100. std = (1 / lambda_)**.5 # Weight st. dev. prior noise = tf.Variable(1.) # Likelihood st. dev. initialisation, and learning net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.DenseVariational(output_dim=1, std=std, full=True)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def test_dense_distribution(make_data): """Test initialising dense variational layers with distributions.""" x, _, _ = make_data S = 3 x_, X_ = _make_placeholders(x, S) N = x.shape[0] Phi, KL = ab.DenseVariational(output_dim=D)(X_) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() P = Phi.eval(feed_dict={x_: x}) assert P.shape == (S, N, D) assert KL.eval() >= 0.
def bayesian_linear(X, Y): """Bayesian Linear Regression.""" reg = .01 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.5) # Likelihood st. dev. initialisation, and learning net = ( ab.InputLayer(name="X", n_samples=n_samples) >> ab.DenseVariational(output_dim=1, std=reg, full=True) ) phi, kl = net(X=X) lkhood = tf.distributions.Normal(loc=phi, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return phi, loss
def gaussian_process(X, Y): """Gaussian Process Regression.""" lambda_ = 0.1 # Initial weight prior std. dev, this is optimised later noise = tf.Variable(.5) # Likelihood st. dev. initialisation, and learning lenscale = tf.Variable(1.) # learn the length scale kern = ab.RBF(lenscale=ab.pos(lenscale)) # keep the length scale positive # kern = ab.RBFVariational(lenscale=ab.pos(lenscale)) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=50, kernel=kern) >> ab.DenseVariational( output_dim=1, std=lambda_, full=True)) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=ab.pos(noise)) # lkhood = tf.distributions.StudentT(df=1., loc=f, scale=ab.pos(noise)) loss = ab.elbo(lkhood, Y, N, kl) return f, loss
def test_ncp_output(make_data): """Test we are making the ncp extra samples correctly, and KL is OK.""" x, _, X = make_data x = x.astype(np.float32) net_ncp = (ab.InputLayer(name='X', n_samples=1) >> ab.NCPContinuousPerturb( input_noise=1.) >> ab.DenseNCP(output_dim=1)) net = (ab.InputLayer(name='X', n_samples=1) >> ab.DenseVariational(output_dim=1)) F, KL = net_ncp(X=x) F_var, KL_var = net(X=x) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() f, f_var = F.eval(), F_var.eval() assert f.shape[0] == 1 assert f.shape == f_var.shape assert KL.eval() >= KL_var.eval()
def main(): """Run the imputation demo.""" # Fetch data, one-hot targets and standardise data data = fetch_covtype() Xo = data.data[:, :10] Xc = data.data[:, 10:] Y = (data.target - 1) Xo[:, :10] = StandardScaler().fit_transform(Xo[:, :10]) # Network construction n_samples_ = tf.placeholder_with_default(LSAMPLES, []) data_input = ab.InputLayer(name='Xo', n_samples=n_samples_) # Data input # Run this with imputation if METHOD is not None: print("Imputation method {}.".format(METHOD)) # Fake some missing data rnd = np.random.RandomState(RSEED) mask = rnd.rand(*Xo.shape) < FRAC_MISSING Xo[mask] = MISSING_VAL # Use Aboleth to imputate mask_input = ab.MaskInputLayer(name='M') # Missing data mask input xm = np.ma.array(Xo, mask=mask) if METHOD == "LearnedNormalImpute": mean = tf.Variable(np.ma.mean(xm, axis=0).data.astype(np.float32)) std = ab.pos_variable(np.ma.std(xm, axis=0) .data.astype(np.float32)) input_layer = ab.NormalImpute(data_input, mask_input, mean, std) elif METHOD == "LearnedScalarImpute": scalar = tf.Variable(tf.zeros(Xo.shape[-1])) input_layer = ab.ScalarImpute(data_input, mask_input, scalar) elif METHOD == "FixedNormalImpute": mean = np.ma.mean(xm, axis=0).data.astype(np.float32) std = np.ma.std(xm, axis=0).data.astype(np.float32) input_layer = ab.NormalImpute(data_input, mask_input, mean, std) elif METHOD == "FixedScalarImpute": mean = np.ma.mean(xm, axis=0).data.astype(np.float32) input_layer = ab.ScalarImpute(data_input, mask_input, mean) elif METHOD == "MeanImpute": input_layer = ab.MeanImpute(data_input, mask_input) else: raise ValueError("Invalid method!") # Run this without imputation else: print("No missing data") input_layer = data_input mask = np.zeros_like(Xo) cat_layers = ( ab.InputLayer(name='Xc', n_samples=n_samples_) >> ab.DenseVariational(output_dim=8) ) con_layers = ( input_layer >> ab.DenseVariational(output_dim=8) ) net = ( ab.Concat(cat_layers, con_layers) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=NCLASSES) ) # Split the training and testing data Xo_tr, Xo_ts, Xc_tr, Xc_ts, Y_tr, Y_ts, M_tr, M_ts = train_test_split( Xo.astype(np.float32), Xc.astype(np.float32), Y.astype(np.int32), mask, test_size=FRAC_TEST, random_state=RSEED ) N_tr, Do = Xo_tr.shape _, Dc = Xc_tr.shape # Data with tf.name_scope("Input"): Xob, Xcb, Yb, Mb = batch_training(Xo_tr, Xc_tr, Y_tr, M_tr, n_epochs=NEPOCHS, batch_size=BSIZE) Xo_ = tf.placeholder_with_default(Xob, shape=(None, Do)) Xc_ = tf.placeholder_with_default(Xcb, shape=(None, Dc)) # Y_ has to be this dimension for compatability with Categorical Y_ = tf.placeholder_with_default(Yb, shape=(None,)) if METHOD is not None: M_ = tf.placeholder_with_default(Mb, shape=(None, Do)) with tf.name_scope("Deepnet"): if METHOD is not None: nn, kl = net(Xo=Xo_, Xc=Xc_, M=M_) else: nn, kl = net(Xo=Xo_, Xc=Xc_) lkhood = tf.distributions.Categorical(logits=nn) loss = ab.elbo(lkhood.log_prob(Y_), kl, N_tr) prob = ab.sample_mean(lkhood.probs) with tf.name_scope("Train"): optimizer = tf.train.AdamOptimizer() global_step = tf.train.create_global_step() train = optimizer.minimize(loss, global_step=global_step) # Logging learning progress log = tf.train.LoggingTensorHook( {'step': global_step, 'loss': loss}, every_n_iter=1000 ) # This is the main training "loop" with tf.train.MonitoredTrainingSession( config=CONFIG, save_summaries_steps=None, save_checkpoint_secs=None, hooks=[log] ) as sess: try: while not sess.should_stop(): sess.run(train) except tf.errors.OutOfRangeError: print('Input queues have been exhausted!') pass # Prediction feed_dict = {Xo_: Xo_ts, Xc_: Xc_ts, Y_: [0], n_samples_: PSAMPLES} if METHOD is not None: feed_dict[M_] = M_ts p = sess.run(prob, feed_dict=feed_dict) # Get mean of samples for prediction, and max probability assignments Ey = p.argmax(axis=1) # Score results acc = accuracy_score(Y_ts, Ey) ll = log_loss(Y_ts, p) conf = confusion_matrix(Y_ts, Ey) print("Final scores: {}".format(METHOD)) print("\tAccuracy = {}\n\tLog loss = {}\n\tConfusion =\n{}". format(acc, ll, conf))
NOISE = 3.0 # Initial estimate of the observation noise # Random Fourier Features, this is setting up an anisotropic length scale, or # one length scale per dimension LENSCALE = tf.Variable(5 * np.ones((21, 1), dtype=np.float32)) KERNEL = ab.RBF(ab.pos(LENSCALE)) # Variational Fourier Features -- length-scale setting here is the "prior" # LENSCALE = 10. # KERNEL = ab.RBFVariational(lenscale=LENSCALE, lenscale_posterior=LENSCALE) # Build the approximate GP net = ab.stack( ab.InputLayer(name='X', n_samples=NSAMPLES), ab.RandomFourier(n_features=NFEATURES, kernel=KERNEL), ab.DenseVariational(output_dim=1, full=True) ) # Learning and prediction settings BATCH_SIZE = 50 # number of observations per mini batch NEPOCHS = 100 # Number of times to iterate though the dataset NPREDICTSAMPLES = 10 # results in NSAMPLES * NPREDICTSAMPLES samples CONFIG = tf.ConfigProto(device_count={'GPU': 1}) # Use GPU ? def main(): """Run the demo.""" data = fetch_gpml_sarcos_data() Xr = data.train.data.astype(np.float32) Yr = data.train.targets.astype(np.float32)[:, np.newaxis]
BSIZE = 100 # Mini batch size CONFIG = tf.ConfigProto(device_count={'GPU': 0}) # Use GPU ? LSAMPLES = 5 # Number of samples the mode returns PSAMPLES = 10 # This will give LSAMPLES * PSAMPLES predictions NCLASSES = 7 # Number of target classes NFEATURES = 100 # Number of random features to use # Network construction data_input = ab.InputLayer(name='X', n_samples=LSAMPLES) # Data input mask_input = ab.MaskInputLayer(name='M') # Missing data mask input lenscale = ab.pos(tf.Variable(np.ones((54, 1), dtype=np.float32))) layers = (ab.RandomArcCosine(n_features=NFEATURES, lenscale=lenscale) >> ab.DenseVariational(output_dim=NCLASSES)) def main(): """Run the imputation demo.""" # Fetch data, one-hot targets and standardise data data = fetch_covtype() X = data.data Y = (data.target - 1) X = StandardScaler().fit_transform(X) # Now fake some missing data with a mask rnd = np.random.RandomState(RSEED) mask = rnd.rand(*X.shape) < FRAC_MISSING X[mask] = MISSING_VAL
# Variational Fourier Features -- length-scale setting here is the "prior", we # can choose to optimise this or not # lenscale = 1. # kern = ab.RBFVariational(lenscale=lenscale) # This is VAR-FIXED kernel from # Cutjar et. al. 2017 # This is how we make the "latent function" of a Gaussian process, here # n_features controls how many random basis functions we use in the # approximation. The more of these, the more accurate, but more costly # computationally. "full" indicates we want a full-covariance matrix Gaussian # posterior of the model weights. This is optional, but it does greatly improve # the model uncertainty away from the data. n_samples_ = tf.placeholder(tf.int32) net = (ab.InputLayer(name="X", n_samples=n_samples_) >> ab.RandomFourier( n_features=200, kernel=kern) >> ab.DenseVariational( output_dim=1, learn_prior=True, full=True)) def main(): """Run the demo.""" n_iters = int(round(n_epochs * N / batch_size)) print("Iterations = {}".format(n_iters)) # Get training and testing data Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise) # Prediction points Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis] Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis] # Set up the probability image query points
# kern = ab.RBF(lenscale=ab.pos(lenscale)) # keep the length scale positive # Variational Fourier Features -- length-scale setting here is the "prior", we # can choose to optimise this or not lenscale = 1. kern = ab.RBFVariational(lenscale=lenscale) # This is VAR-FIXED kernel from # Cutjar et. al. 2017 # This is how we make the "latent function" of a Gaussian process, here # n_features controls how many random basis functions we use in the # approximation. The more of these, the more accurate, but more costly # computationally. "full" indicates we want a full-covariance matrix Gaussian # posterior of the model weights. This is optional, but it does greatly improve # the model uncertainty away from the data. net = (ab.InputLayer(name="X", n_samples=n_samples) >> ab.RandomFourier( n_features=100, kernel=kern) >> ab.DenseVariational( output_dim=1, std=reg, full=True)) def main(): """Run the demo.""" n_iters = int(round(n_epochs * N / batch_size)) print("Iterations = {}".format(n_iters)) # Get training and testing data Xr, Yr, Xs, Ys = gp_draws(N, Ns, kern=kernel, noise=true_noise) # Prediction points Xq = np.linspace(-20, 20, Ns).astype(np.float32)[:, np.newaxis] Yq = np.linspace(-4, 4, Ns).astype(np.float32)[:, np.newaxis] # Set up the probability image query points