def load_data(hps): from examples.utils import dataset from examples import conf data_path = os.path.join(conf.data_dir, hps.dataset + '.data') data_func = getattr(dataset, 'load_uci_' + hps.dataset) x_train, y_train, x_valid, y_valid, x_test, y_test = data_func(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) n_train, hps.n_x = x_train.shape x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) return x_train, y_train, x_test, y_test, n_train, mean_y_train, std_y_train
if __name__ == '__main__': tf.set_random_seed(1237) np.random.seed(1234) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 learning_rate = 0.01 anneal_lr_freq = 100
def main(): np.random.seed(1234) tf.set_random_seed(1237) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] @zs.reuse('model') def bayesianNN(observed, x, n_x, layer_sizes, n_particles): with zs.BayesianNet(observed=observed) as model: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mu = tf.zeros([1, n_out, n_in + 1]) ws.append( zs.Normal('w' + str(i), w_mu, std=1., n_samples=n_particles, group_event_ndims=2)) # forward ly_x = tf.expand_dims( tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3) for i in range(len(ws)): w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1]) ly_x = tf.concat( [ly_x, tf.ones([n_particles, tf.shape(x)[0], 1, 1])], 2) ly_x = tf.matmul(w, ly_x) / \ tf.sqrt(tf.to_float(tf.shape(ly_x)[2])) if i < len(ws) - 1: ly_x = tf.nn.relu(ly_x) y_mean = tf.squeeze(ly_x, [2, 3]) y_logstd = tf.get_variable('y_logstd', shape=[], initializer=tf.constant_initializer(0.)) y = zs.Normal('y', y_mean, logstd=y_logstd) return model, y_mean def mean_field_variational(layer_sizes, n_particles): with zs.BayesianNet() as variational: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mean = tf.get_variable( 'w_mean_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) w_logstd = tf.get_variable( 'w_logstd_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) ws.append( zs.Normal('w' + str(i), w_mean, logstd=w_logstd, n_samples=n_particles, group_event_ndims=2)) return variational # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x = tf.placeholder(tf.float32, shape=[None, n_x]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1]) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) grads = optimizer.compute_gradients(-lower_bound) infer = optimizer.apply_gradients(grads) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y_obs}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lb, test_rmse, test_ll = sess.run( [lower_bound, rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print('>> lower bound = {}, rmse = {}, log_likelihood = {}'. format(test_lb, test_rmse, test_ll))
n_samples=n_particles, group_ndims=1) return variational if __name__ == '__main__': tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_mnist_realval(data_path, one_hot=False) x_train = np.vstack([x_train, x_valid]).astype('float32') y_train = np.concatenate([y_train, y_valid]).astype('int32') x_train, x_test, _, _ = dataset.standardize(x_train, x_test) n_x = x_train.shape[1] # Define training/evaluation parameters epochs = 500 batch_size = 1000 lb_samples = 10 ll_samples = 100 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 3 learning_rate = 0.001 anneal_lr_freq = 100 anneal_lr_rate = 0.75 # placeholders n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
def main(): tf.set_random_seed(1237) np.random.seed(2345) # Load UCI protein data data_path = os.path.join(conf.data_dir, "protein.data") x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_protein_data(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) n_train, x_dim = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Build the computation graph n_particles = 20 x = tf.placeholder(tf.float32, shape=[None, x_dim]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [x_dim] + n_hiddens + [1] w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)] wv = [] logstds = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): wv.append( tf.Variable( tf.random_uniform([n_particles, n_out, n_in + 1]) * 4 - 2)) logstds.append(tf.Variable(tf.zeros([n_out, n_in + 1]))) model = build_bnn(x, layer_sizes, logstds, n_particles) def log_joint(bn): log_pws = bn.cond_log_prob(w_names) log_py_xw = bn.cond_log_prob('y') return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train model.log_joint = log_joint # sgmcmc = zs.SGLD(learning_rate=4e-6) sgmcmc = zs.SGHMC(learning_rate=2e-6, friction=0.2, n_iter_resample_v=1000, second_order=True) # sgmcmc = zs.SGNHT(learning_rate=1e-5, variance_extra=0., tune_rate=50., # second_order=True) latent = dict(zip(w_names, wv)) observed = {'y': y} # E step: Sample the parameters sample_op, sgmcmc_info = sgmcmc.sample(model, observed=observed, latent=latent) mean_k = sgmcmc_info.mean_k # M step: Update the logstd hyperparameters esti_logstds = [0.5 * tf.log(tf.reduce_mean(w * w, axis=0)) for w in wv] output_logstds = dict( zip(w_names, [0.5 * tf.log(tf.reduce_mean(w * w)) for w in wv])) assign_ops = [ logstds[i].assign(logstd) for (i, logstd) in enumerate(esti_logstds) ] assign_op = tf.group(assign_ops) # prediction: rmse & log likelihood bn = model.observe(**merge_dicts(latent, observed)) y_mean = bn["y_mean"] y_pred = tf.reduce_mean(y_mean, 0) # Define training/evaluation parameters epochs = 500 batch_size = 100 iters = (n_train - 1) // batch_size + 1 preds = [] epochs_ave_pred = 1 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): perm = np.random.permutation(x_train.shape[0]) x_train = x_train[perm, :] y_train = y_train[perm] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, mean_k_value = sess.run([sample_op, mean_k], feed_dict={ x: x_batch, y: y_batch }) # print("Epoch {} mean_k = {}".format(epoch, mean_k_value)) sess.run(assign_op) test_pred = sess.run(y_pred, feed_dict={x: x_test}) preds.append(test_pred) pred = np.mean(preds[-epochs_ave_pred:], axis=0) test_rmse = np.sqrt(np.mean((pred - y_test)**2)) * std_y_train print('>> Epoch {} Test = {} logstds = {}'.format( epoch, test_rmse, sess.run(output_logstds)))
def main(): # tf.set_random_seed(1237) # np.random.seed(1234) hps = parser.parse_args() # Load data data_path = os.path.join(conf.data_dir, hps.dataset + '.data') data_func = getattr(dataset, 'load_uci_' + hps.dataset) x_train, y_train, x_valid, y_valid, x_test, y_test = data_func(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) n_train, n_covariates = x_train.shape hps.dtype = getattr(tf, hps.dtype) # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Build model kernel = RBFKernel(n_covariates) x_ph = tf.placeholder(hps.dtype, [None, n_covariates], 'x') y_ph = tf.placeholder(hps.dtype, [None], 'y') z_pos = tf.get_variable('z/pos', [hps.n_z, n_covariates], hps.dtype, initializer=tf.random_uniform_initializer(-1, 1)) n_particles_ph = n_particles_ph = tf.placeholder(tf.int32, [], 'n_particles') batch_size = tf.cast(tf.shape(x_ph)[0], hps.dtype) model = build_model(hps, kernel, z_pos, x_ph, n_particles_ph) variational = build_variational(hps, kernel, z_pos, x_ph, n_particles_ph) # ELBO = E_q log (p(y|fx)p(fx|fz)p(fz) / p(fx|fz)q(fz)) # So we remove p(fx|fz) in both log_joint and latent def log_joint(bn): prior, log_py_given_fx = bn.cond_log_prob(['fz', 'y']) return prior + log_py_given_fx / batch_size * n_train model.log_joint = log_joint [var_fz, var_fx] = variational.query(['fz', 'fx'], outputs=True, local_log_prob=True) var_fx = (var_fx[0], tf.zeros_like(var_fx[1])) lower_bound = zs.variational.elbo(model, observed={'y': y_ph}, latent={ 'fz': var_fz, 'fx': var_fx }, axis=0) cost = lower_bound.sgvb() optimizer = tf.train.AdamOptimizer(learning_rate=hps.lr) infer_op = optimizer.minimize(cost) # Prediction ops model = model.observe(fx=var_fx[0], y=y_ph) log_likelihood = model.cond_log_prob('y') std_y_train = tf.cast(std_y_train, hps.dtype) log_likelihood = zs.log_mean_exp(log_likelihood, 0) / batch_size - \ tf.log(std_y_train) y_pred_mean = tf.reduce_mean(model['y'].distribution.mean, axis=0) pred_mse = tf.reduce_mean((y_pred_mean - y_ph)**2) * std_y_train**2 def infer_step(sess, x_batch, y_batch): fd = {x_ph: x_batch, y_ph: y_batch, n_particles_ph: hps.n_particles} return sess.run([infer_op, lower_bound], fd)[1] def predict_step(sess, x_batch, y_batch): fd = { x_ph: x_batch, y_ph: y_batch, n_particles_ph: hps.n_particles_test } return sess.run([log_likelihood, pred_mse], fd) iters = int(np.ceil(x_train.shape[0] / float(hps.batch_size))) test_freq = 100 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, hps.n_epoch + 1): lbs = [] indices = np.arange(x_train.shape[0]) np.random.shuffle(indices) x_train = x_train[indices] y_train = y_train[indices] for t in range(iters): lb = infer_step( sess, x_train[t * hps.batch_size:(t + 1) * hps.batch_size], y_train[t * hps.batch_size:(t + 1) * hps.batch_size]) lbs.append(lb) if 10 * epoch % test_freq == 0: print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lls = [] test_mses = [] for t in range(0, x_test.shape[0], hps.batch_size): ll, mse = predict_step(sess, x_test[t:t + hps.batch_size], y_test[t:t + hps.batch_size]) test_lls.append(ll) test_mses.append(mse) print('>> TEST') print('>> Test log likelihood = {}, rmse = {}'.format( np.mean(test_lls), np.sqrt(np.mean(test_mses))))
def main(): tf.set_random_seed(1237) np.random.seed(2345) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, "housing.data") x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) n_train, x_dim = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x = tf.placeholder(tf.float32, shape=[None, x_dim]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [x_dim] + n_hiddens + [1] w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)] model = build_bnn(x, layer_sizes, n_particles) variational = build_mean_field_variational(layer_sizes, n_particles) def log_joint(bn): log_pws = bn.cond_log_prob(w_names) log_py_xw = bn.cond_log_prob('y') return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train model.log_joint = log_joint lower_bound = zs.variational.elbo(model, {'y': y}, variational=variational, axis=0) cost = lower_bound.sgvb() optimizer = tf.train.AdamOptimizer(learning_rate=0.01) infer_op = optimizer.minimize(cost) # prediction: rmse & log likelihood y_mean = lower_bound.bn["y_mean"] y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = lower_bound.bn.cond_log_prob("y") log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = (n_train - 1) // batch_size + 1 test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): perm = np.random.permutation(x_train.shape[0]) x_train = x_train[perm, :] y_train = y_train[perm] lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_rmse, test_ll = sess.run([rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print('>> Test rmse = {}, log_likelihood = {}'.format( test_rmse, test_ll))
def main(): tf.set_random_seed(1237) np.random.seed(1234) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x = tf.placeholder(tf.float32, shape=[None, n_x]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) lower_bound = zs.variational.elbo(log_joint, observed={'y': y}, latent=latent, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) infer_op = optimizer.minimize(cost) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lb, test_rmse, test_ll = sess.run( [lower_bound, rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print( '>> Test lower bound = {}, rmse = {}, log_likelihood = {}'. format(test_lb, test_rmse, test_ll))