'w_logstd_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) ws.append( zs.Normal('w' + str(i), w_mean, logstd=w_logstd, n_samples=n_particles, group_ndims=2)) return variational if __name__ == '__main__': tf.set_random_seed(1237) np.random.seed(1234) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000
def main(): tf.set_random_seed(1237) np.random.seed(2345) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, "housing.data") x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) n_train, x_dim = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") x = tf.placeholder(tf.float32, shape=[None, x_dim]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [x_dim] + n_hiddens + [1] w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)] model = build_bnn(x, layer_sizes, n_particles) variational = build_mean_field_variational(layer_sizes, n_particles) def log_joint(bn): log_pws = bn.cond_log_prob(w_names) log_py_xw = bn.cond_log_prob('y') return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train model.log_joint = log_joint lower_bound = zs.variational.elbo(model, {'y': y}, variational=variational, axis=0) cost = lower_bound.sgvb() optimizer = tf.train.AdamOptimizer(learning_rate=0.01) infer_op = optimizer.minimize(cost) # prediction: rmse & log likelihood y_mean = lower_bound.bn["y_mean"] y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = lower_bound.bn.cond_log_prob("y") log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = (n_train - 1) // batch_size + 1 test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): perm = np.random.permutation(x_train.shape[0]) x_train = x_train[perm, :] y_train = y_train[perm] lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_rmse, test_ll = sess.run([rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print('>> Test rmse = {}, log_likelihood = {}'.format( test_rmse, test_ll))
def main(): np.random.seed(1234) tf.set_random_seed(1237) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] @zs.reuse('model') def bayesianNN(observed, x, n_x, layer_sizes, n_particles): with zs.BayesianNet(observed=observed) as model: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mu = tf.zeros([1, n_out, n_in + 1]) ws.append( zs.Normal('w' + str(i), w_mu, std=1., n_samples=n_particles, group_event_ndims=2)) # forward ly_x = tf.expand_dims( tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3) for i in range(len(ws)): w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1]) ly_x = tf.concat( [ly_x, tf.ones([n_particles, tf.shape(x)[0], 1, 1])], 2) ly_x = tf.matmul(w, ly_x) / \ tf.sqrt(tf.to_float(tf.shape(ly_x)[2])) if i < len(ws) - 1: ly_x = tf.nn.relu(ly_x) y_mean = tf.squeeze(ly_x, [2, 3]) y_logstd = tf.get_variable('y_logstd', shape=[], initializer=tf.constant_initializer(0.)) y = zs.Normal('y', y_mean, logstd=y_logstd) return model, y_mean def mean_field_variational(layer_sizes, n_particles): with zs.BayesianNet() as variational: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mean = tf.get_variable( 'w_mean_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) w_logstd = tf.get_variable( 'w_logstd_' + str(i), shape=[1, n_out, n_in + 1], initializer=tf.constant_initializer(0.)) ws.append( zs.Normal('w' + str(i), w_mean, logstd=w_logstd, n_samples=n_particles, group_event_ndims=2)) return variational # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x = tf.placeholder(tf.float32, shape=[None, n_x]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1]) lower_bound = tf.reduce_mean( zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) grads = optimizer.compute_gradients(-lower_bound) infer = optimizer.apply_gradients(grads) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y_obs}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lb, test_rmse, test_ll = sess.run( [lower_bound, rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print('>> lower bound = {}, rmse = {}, log_likelihood = {}'. format(test_lb, test_rmse, test_ll))
def main(): tf.set_random_seed(1237) np.random.seed(1234) # Load UCI Boston housing data data_path = os.path.join(conf.data_dir, 'housing.data') x_train, y_train, x_valid, y_valid, x_test, y_test = \ dataset.load_uci_boston_housing(data_path) x_train = np.vstack([x_train, x_valid]) y_train = np.hstack([y_train, y_valid]) N, n_x = x_train.shape # Standardize data x_train, x_test, _, _ = dataset.standardize(x_train, x_test) y_train, y_test, mean_y_train, std_y_train = dataset.standardize( y_train, y_test) # Define model parameters n_hiddens = [50] # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x = tf.placeholder(tf.float32, shape=[None, n_x]) y = tf.placeholder(tf.float32, shape=[None]) layer_sizes = [n_x] + n_hiddens + [1] w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)] def log_joint(observed): model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles) log_pws = model.local_log_prob(w_names) log_py_xw = model.local_log_prob('y') return tf.add_n(log_pws) + log_py_xw * N variational = mean_field_variational(layer_sizes, n_particles) qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True) latent = dict(zip(w_names, qw_outputs)) lower_bound = zs.variational.elbo(log_joint, observed={'y': y}, latent=latent, axis=0) cost = tf.reduce_mean(lower_bound.sgvb()) lower_bound = tf.reduce_mean(lower_bound) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) infer_op = optimizer.minimize(cost) # prediction: rmse & log likelihood observed = dict((w_name, latent[w_name][0]) for w_name in w_names) observed.update({'y': y}) model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles) y_pred = tf.reduce_mean(y_mean, 0) rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train log_py_xw = model.local_log_prob('y') log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \ tf.log(std_y_train) # Define training/evaluation parameters lb_samples = 10 ll_samples = 5000 epochs = 500 batch_size = 10 iters = int(np.floor(x_train.shape[0] / float(batch_size))) test_freq = 10 # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): lbs = [] for t in range(iters): x_batch = x_train[t * batch_size:(t + 1) * batch_size] y_batch = y_train[t * batch_size:(t + 1) * batch_size] _, lb = sess.run([infer_op, lower_bound], feed_dict={ n_particles: lb_samples, x: x_batch, y: y_batch }) lbs.append(lb) print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs))) if epoch % test_freq == 0: test_lb, test_rmse, test_ll = sess.run( [lower_bound, rmse, log_likelihood], feed_dict={ n_particles: ll_samples, x: x_test, y: y_test }) print('>> TEST') print( '>> Test lower bound = {}, rmse = {}, log_likelihood = {}'. format(test_lb, test_rmse, test_ll))