def test_replace_itself(self): with self.test_session(): x = tf.constant(2.0) y = tf.constant(3.0) x_new = ed.copy(x, {x: y}, replace_itself=False) self.assertEqual(x_new.eval(), 2.0) x_new = ed.copy(x, {x: y}, replace_itself=True) self.assertEqual(x_new.eval(), 3.0)
def test_copy_parent_rvs(self): with self.test_session() as sess: x = Normal(0.0, 1.0) y = tf.constant(3.0) z = x * y z_new = ed.copy(z, scope='no_copy_parent_rvs', copy_parent_rvs=False) self.assertEqual(len(ed.random_variables()), 1) z_new = ed.copy(z, scope='copy_parent_rvs', copy_parent_rvs=True) self.assertEqual(len(ed.random_variables()), 2)
def test_copy_q(self): with self.test_session() as sess: x = tf.constant(2.0) y = tf.random_normal([]) x_new = ed.copy(x, {x: y}, replace_itself=True, copy_q=False) x_new_val, y_val = sess.run([x_new, y]) self.assertEqual(x_new_val, y_val) x_new = ed.copy(x, {x: y}, replace_itself=True, copy_q=True) x_new_val, x_val, y_val = sess.run([x_new, x, y]) self.assertNotEqual(x_new_val, x_val) self.assertNotEqual(x_new_val, y_val)
def test_ordering_rv_tensor(self): # Check that random variables are copied correctly in dependency # structure. with self.test_session() as sess: ed.set_seed(12432) x = Bernoulli(logits=0.0) y = tf.cast(x, tf.float32) y_new = ed.copy(y) x_new = ed.copy(x) x_new_val, y_new_val = sess.run([x_new, y_new]) self.assertEqual(x_new_val, y_new_val)
def test_ordering_rv_rv(self): # Check that random variables are copied correctly in dependency # structure. with self.test_session() as sess: ed.set_seed(21782) x = Normal(loc=0.0, scale=10.0) x_abs = tf.abs(x) y = Normal(loc=x_abs, scale=1e-8) y_new = ed.copy(y) x_new = ed.copy(x) x_new_val, y_new_val = sess.run([x_new, y_new]) self.assertAllClose(abs(x_new_val), y_new_val)
def train_model(self, games, results, num_train_steps=10000): params_post = {p: q for p, q in zip(self.prior, self.var_post)} x = tf.placeholder(tf.int32, shape=[None, 3]) y = self.predict(x) print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ y: results, x: games })) inference = ed.KLqp(params_post, data={y: results, x: games}) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(y.d2, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ out_post: results, x: games }))
def main(): X_train, y_train, X_test, y_test, train_filenames, test_filenames = prepare_scutfbp5500( feat_layers=["conv4_1", "conv5_1"]) print('Shape of X_train: {0}'.format(X_train)) print('Shape of X_test: {0}'.format(X_test)) print('Shape of y_train: {0}'.format(y_train)) print('Shape of y_test: {0}'.format(y_test)) N = 3300 D = len(X_train[0]) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_samples=3300, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Mean absolute error on test data:") print(ed.evaluate('mean_absolute_error', data={X: X_test, y_post: y_test}))
def predict(samples, outputs, latent_var_dict, input_ph): """ :param samples: Data to score :param outputs: Tensor that represents the outputs :param latent_var_dict: Dictionary that contains the latent variables in the model. :param input_ph: Placeholder for the inputs :return: Predictions """ x_post = ed.copy(outputs[-1], latent_var_dict) sess = ed.get_session() predictions = np.zeros((samples.shape[0], 3)) for i in range(0, samples.shape[0]): feed_dict = {} feed_dict.update( {key: [value] for key, value in zip(input_ph, samples[i, :])}) quantile_1, quantile_2, mean = sess.run( [x_post.quantile(0.025), x_post.quantile(0.975), x_post.mean()], feed_dict=feed_dict) predictions[i, :] = [quantile_1, mean, quantile_2] return predictions
def test_placeholder(self): with self.test_session() as sess: x = tf.placeholder(tf.float32, name="CustomName") y = tf.constant(3.0) z = x * y z_new = ed.copy(z) self.assertEqual(sess.run(z_new, feed_dict={x: 4.0}), 12.0)
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # INFERENCE qp = Empirical(params=tf.get_variable( "qp/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_p = Beta(3.0, 9.0) inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) inference.run() # CRITICISM # exact posterior has mean 0.25 and std 0.12 sess = ed.get_session() mean, stddev = sess.run([qp.mean(), qp.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) x_post = ed.copy(x, {p: qp}) tx_rep, tx = ed.ppc( lambda xs, zs: tf.reduce_mean(tf.cast(xs[x_post], tf.float32)), data={x_post: x_data}) ed.ppc_stat_hist_plot( tx[0], tx_rep, stat_name=r'$T \equiv$mean', bins=10) plt.show()
def test(self, testdata, load_path, i, batchsize=400, n_samples=100): results = self.net(self.inputs, i) results = tf.matmul(results, self.w12) # sess = self.sess # if i == 0: collection = [ item for item in tf.all_variables() if 'ww' not in item.name ] saver = tf.train.Saver() saver.restore(sess, load_path) loc_copy = ed.copy(results, self.diction) w = 0 n = len(testdata) result = [] tk = i while (w < n): temp = testdata[w:w + batchsize] seq = [item[0] for item in temp] task = [tk for i in seq] label = [item[-1] for item in temp] feed_dict = {self.inputs: seq, self.task: task} probs = ([sess.run(loc_copy, feed_dict) for i in range(n_samples)]) loc = np.mean(probs, 0).tolist() var = np.var(probs, 0).tolist() res = [[i, j, l] for i, j, l in zip(loc, var, label)] result.extend(list(res)) w += batchsize return result
def test_random(self): with self.test_session() as sess: ed.set_seed(3742) x = tf.random_normal([]) x_copy = ed.copy(x) result_copy, result = sess.run([x_copy, x]) self.assertNotAlmostEquals(result_copy, result)
def test_swap_tensor_tensor(self): with self.test_session(): x = tf.constant(2.0) y = tf.constant(3.0) z = x * y qx = tf.constant(4.0) z_new = ed.copy(z, {x: qx}) self.assertEqual(z_new.eval(), 12.0)
def test_swap_placeholder_tensor(self): with self.test_session(): x = tf.placeholder(tf.float32, name="CustomName") y = tf.constant(3.0) z = x * y qx = tf.constant(4.0) z_new = ed.copy(z, {x: qx}) self.assertEqual(z_new.eval(), 12.0)
def test_variable(self): with self.test_session() as sess: x = tf.Variable(2.0, name="CustomName") y = tf.constant(3.0) z = x * y z_new = ed.copy(z) tf.variables_initializer([x]).run() self.assertEqual(z_new.eval(), 6.0)
def test_swap_rv_rv(self): with self.test_session(): ed.set_seed(325135) x = Normal(0.0, 0.1) y = tf.constant(1.0) z = x * y qx = Normal(10.0, 0.1) z_new = ed.copy(z, {x: qx}) self.assertGreater(z_new.eval(), 5.0)
def test_list(self): with self.test_session() as sess: x = Normal(tf.constant(0.0), tf.constant(0.1)) y = Normal(tf.constant(10.0), tf.constant(0.1)) cat = Categorical(logits=tf.zeros(5)) components = [Normal(x, tf.constant(0.1)) for _ in range(5)] z = Mixture(cat=cat, components=components) z_new = ed.copy(z, {x: y.value()}) self.assertGreater(z_new.value().eval(), 5.0)
def test_swap_rv_tensor(self): with self.test_session(): ed.set_seed(289362) x = Normal(mu=0.0, sigma=0.1) y = tf.constant(1.0) z = x * y qx = Normal(mu=10.0, sigma=0.1) z_new = ed.copy(z, {x: qx.value()}) self.assertGreater(z_new.eval(), 5.0)
def test_swap_tensor_variable(self): with self.test_session() as sess: x = tf.constant(2.0) y = tf.constant(3.0) z = x * y qx = tf.Variable(4.0, name="CustomName") z_new = ed.copy(z, {x: qx}) tf.variables_initializer([qx]).run() self.assertEqual(z_new.eval(), 12.0)
def build_net(x_train, y_train, num_train_steps=10000, x_test=None, y_test=None): # Number of stats currently used to predict outcome- 23 per team + variable for side inputs = 47 outputs = 1 if x_test is None: x_test = x_train if y_test is None: y_test = y_train # widths of fully-connected layers in NN # Input data goes here (via feed_dict or equiv) x = tf.placeholder(tf.float32, shape=[None, inputs]) layer_widths = [16, 16, 16, 16, 16, 16] activations = [Nets.selu for _ in layer_widths] + [tf.identity] layer_widths += [outputs] net = Nets.SuperDenseNet(inputs, layer_widths, activations) # Construct all parameters of NN, set to independant gaussian priors params = [Nets.gauss_prior(shape) for shape in net.param_space()] out = ed.models.Bernoulli(logits=net.apply(x, params)) # Variational 'posterior's for NN params qparams = [Nets.gauss_var_post(w.shape) for w in params] asd = tf.train.AdamOptimizer # Map from random variables to their variational posterior objects params_post = {params[i]: qparams[i] for i in range(len(params))} # evaluate accuracy and likelihood of model over the dataset before training print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out: y_test, x: x_test })) # Run variational inference, minimizing KL(q, p) using stochastic gradient descent over variational params inference = ed.KLqp(params_post, data={out: y_train, x: x_train}) #inference.initialize(optimizer=YFOptimizer()) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(out, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out_post: y_test, x: x_test }))
def test_swap_tensor_rv(self): with self.test_session(): ed.set_seed(95258) x = Normal(0.0, 0.1) y = tf.constant(1.0) z = x * y qx = Normal(10.0, 0.1) z_new = ed.copy(z, {x.value(): qx}) self.assertGreater(z_new.eval(), 5.0)
def test_scan(self): with self.test_session() as sess: ed.set_seed(42) op = tf.scan(lambda a, x: a + x, tf.constant([2.0, 3.0, 1.0])) copy_op = ed.copy(op) result_copy, result = sess.run([copy_op, op]) self.assertAllClose(result_copy, [2.0, 5.0, 6.0]) self.assertAllClose(result, [2.0, 5.0, 6.0])
def train(self, n_iter=1000): D = len(self.team_num_map.keys()) N = self.xs.shape[0] with tf.name_scope('model'): self.X = tf.placeholder(tf.float32, [N, D]) self.w1 = Normal(loc=tf.zeros(D), scale=tf.ones(D)) # self.b1 = Normal(loc=tf.zeros(1), scale=tf.ones(1)) self.y1 = Poisson(rate=tf.exp(ed.dot(self.X, self.w1))) with tf.name_scope('posterior'): if self.inf_type == 'Var': self.qw1 = Normal(loc=tf.get_variable("qw1_ll/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw1_ll/scale", [D]))) # self.qb1 = Normal(loc=tf.get_variable("qb1/loc", [1]), # scale=tf.nn.softplus(tf.get_variable("qb1/scale", # [1]))) elif self.inf_type == 'MAP': self.qw1 = PointMass( Normal(loc=tf.get_variable("qw1_ll/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw1_ll/scale", [D])))) if self.inf_type == 'Var': inference = ed.ReparameterizationKLqp({self.w1: self.qw1}, data={ self.X: self.xs, self.y1: self.ys }) elif self.inf_type == 'MAP': inference = ed.MAP({self.w1: self.qw1}, data={ self.X: self.xs, self.y1: self.ys }) inference.initialize(optimizer=tf.train.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08), n_iter=n_iter) tf.global_variables_initializer().run() self.loss = np.empty(n_iter, dtype=np.float32) for i in range(n_iter): info_dict = inference.update() self.loss[i] = info_dict["loss"] inference.print_progress(info_dict) self._trained = True graph = tf.get_default_graph() self.team_skill = graph.get_tensor_by_name("qw1_ll/loc:0").eval() self.perf_variance = graph.get_tensor_by_name("qw1_ll/scale:0").eval() # self.bias = (graph.get_tensor_by_name("qb1/loc:0").eval(), # graph.get_tensor_by_name("qb2/loc:0").eval()) self.y_post = ed.copy(self.y1, {self.w1: self.qw1}) return
def test_scan_gradients(self): with self.test_session() as sess: a = tf.Variable([1.0, 2.0, 3.0]) op = tf.scan(lambda a, x: a + x, a) copy_op = ed.copy(op) gradient = tf.gradients(op, [a])[0] copy_gradient = tf.gradients(copy_op, [a])[0] tf.variables_initializer([a]).run() result_copy, result = sess.run([copy_gradient, gradient]) self.assertAllClose(result, [3.0, 2.0, 1.0]) self.assertAllClose(result_copy, [3.0, 2.0, 1.0])
def infer(self): print("Running inference..") inference = ed.KLqp( { self.M_U: self.M_qU, self.M_V: self.M_qV, self.W_0: self.qW_0, self.b_0: self.qb_0, self.W_1: self.qW_1, self.b_1: self.qb_1, }, data={ self.M_P: self.predictors_zeros, self.M_P_observed: self.predictors_zeros, self.M_S: self.scores_zeros, self.M_I: self.I_train }) inference.run(n_samples=5, n_iter=2000) # Posterior predictive distributions self.M_qP = ed.copy( self.M_P, { self.M_U: self.M_qU, self.M_V: self.M_qV, self.M_I: tf.ones((self.N, self.M + self.P)) }) self.M_qS = ed.copy( self.M_S, { self.M_P_observed: self.predictors_zeros.astype(np.float32), self.M_U: self.M_qU, self.M_V: self.M_qV, self.M_P: self.M_qP, self.W_0: self.qW_0, self.b_0: self.qb_0, self.W_1: self.qW_1, self.b_1: self.qb_1, self.M_I: tf.ones((self.N, self.M + self.P)), })
def test_queue(self): with self.test_session() as sess: tensor = tf.constant([0.0, 1.0, 2.0, 3.0]) x = tf.train.batch([tensor], batch_size=2, enqueue_many=True, name='CustomName') y = tf.constant(3.0) z = x * y z_new = ed.copy(z) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) self.assertAllEqual(sess.run(z_new), np.array([0.0, 3.0])) self.assertAllEqual(sess.run(z_new), np.array([6.0, 9.0])) coord.request_stop() coord.join(threads)
def copy(self, swap_dict=None, observed=False): """ Build a new random variable with the same values. The underlying tensors or edward objects are copied as well. Args: swap_dict: random variables, variables, tensors, or operations to swap with. observed: determines if the new variable is observed or not. """ new_var = getattr(sys.modules[self.__class__.__module__], type(self).__name__)() new_var.dist = ed.copy(self.dist, swap_dict) new_var.copied_from = self new_var.observed = False return new_var
def train(self, n_iter=1000): D = len(self.team_num_map.keys()) N = self.xs.shape[0] with tf.name_scope('model'): self.X = tf.placeholder(tf.float32, [N, D]) self.w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) self.b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) self.y = Normal(loc=ed.dot(self.X, self.w) + self.b, scale=tf.ones(N)) with tf.name_scope('posterior'): self.qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw/scale", [D]))) self.qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus( tf.get_variable("qb/scale", [1]))) inference = ed.ReparameterizationKLqp( { self.w: self.qw, self.b: self.qb }, data={ self.X: self.xs, self.y: self.ys }) inference.initialize(optimizer=tf.train.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08), n_iter=n_iter) tf.global_variables_initializer().run() # inference.run() self.loss = np.empty(n_iter, dtype=np.float32) for i in range(n_iter): info_dict = inference.update() self.loss[i] = info_dict["loss"] inference.print_progress(info_dict) self._trained = True graph = tf.get_default_graph() self.team_skill = graph.get_tensor_by_name("qw/loc:0").eval() self.bias = graph.get_tensor_by_name("qb/loc:0").eval() self.y_post = ed.copy(self.y, {self.w: self.qw, self.b: self.qb}) return
def train_model(x_train, y_train): # x = tf.placeholder(tf.float32, shape=[None, 2]) # widths of fully-connected layers in NN inputs = 2 outputs = 1 layer_widths = [8, 8, 8, 8, 8] activations = [tf.nn.elu for _ in layer_widths] + [tf.identity] layer_widths += [outputs] # Input data goes here (via feed_dict or equiv) x = tf.placeholder(tf.float32, shape=[len(x_train), inputs]) net = Nets.SuperDenseNet(inputs, layer_widths, activations) # Construct all parameters of NN, set to independant gaussian priors weights = [gauss_prior(shape) for shape in net.weight_shapes()] biases = [gauss_prior(shape) for shape in net.bias_shapes()] out = ed.models.Bernoulli(logits=net.apply(x, weights, biases)) # Variational 'posterior's for NN params qweights = [gauss_var_post(w.shape) for w in weights] qbiases = [gauss_var_post(b.shape) for b in biases] # Map from random variables to their variational posterior objects weights_post = {weights[i]: qweights[i] for i in range(len(weights))} biases_post = {biases[i]: qbiases[i] for i in range(len(weights))} var_post = {**weights_post, **biases_post} # evaluate 'accuracy' (what even is this??) and likelihood of model over the dataset before training print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out: y_train, x: x_train })) # Run variational inference, minimizing KL(q, p) using stochastic gradient descent over variational params inference = ed.KLqp(var_post, data={out: y_train, x: x_train}) inference.run(n_samples=16, n_iter=10000) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(out, var_post) # Re-evaluate metrics print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out_post: y_train, x: x_train }))
def test_nested_scan_gradients(self): with self.test_session() as sess: a = tf.Variable([1.0, 2.0, 3.0]) i = tf.constant(0.0) tot = tf.constant([0.0, 0.0, 0.0]) op = tf.while_loop( lambda i, tot: i < 5, lambda i, tot: (i + 1, tot + tf.scan(lambda x0, x: x0 + i * x, a, 0.0)), [i, tot])[1] copy_op = ed.copy(op) gradient = tf.gradients(op, [a])[0] copy_gradient = tf.gradients(copy_op, [a])[0] tf.variables_initializer([a]).run() result_copy, result = sess.run([copy_gradient, gradient]) self.assertAllClose(result, [30.0, 20.0, 10.0]) self.assertAllClose(result_copy, [30.0, 20.0, 10.0])
def test_nested_scan_gradients(self): with self.test_session() as sess: a = tf.Variable([1.0, 2.0, 3.0]) i = tf.constant(0.0) tot = tf.constant([0.0, 0.0, 0.0]) op = tf.while_loop(lambda i, tot: i < 5, lambda i, tot: (i + 1, tot + tf.scan(lambda x0, x: x0 + i * x, a, 0.0)), [i, tot])[1] copy_op = ed.copy(op) gradient = tf.gradients(op, [a])[0] copy_gradient = tf.gradients(copy_op, [a])[0] tf.variables_initializer([a]).run() result_copy, result = sess.run([copy_gradient, gradient]) self.assertAllClose(result, [30.0, 20.0, 10.0]) self.assertAllClose(result_copy, [30.0, 20.0, 10.0])
from edward.models import Bernoulli, Beta ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(a=1.0, b=1.0) x = Bernoulli(p=tf.ones(10) * p) # INFERENCE qp_a = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp_b = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qp = Beta(a=qp_a, b=qp_b) data = {x: x_data} inference = ed.KLqp({p: qp}, data) inference.run(n_iter=500) # CRITICISM x_post = ed.copy(x, {p: qp}) def T(xs, zs): return tf.reduce_mean(tf.cast(xs[x_post], tf.float32)) print(ed.ppc(T, data={x_post: x_data}))
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) X_test, y_test = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval() b_post = qb.sample(n_posterior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_posterior_samples): output = inputs * w_post[ns] + b_post[ns] plt.plot(inputs, output) plt.show()
def main(_): ed.set_seed(42) # DATA (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes( FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.batch_size) x_ph = tf.placeholder(tf.int32, [None, 28 * 28]) # MODEL zs = [0] * len(FLAGS.hidden_sizes) for l in reversed(range(len(FLAGS.hidden_sizes))): if l == len(FLAGS.hidden_sizes) - 1: logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]]) else: logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) zs[l] = Bernoulli(logits=logits) x = Bernoulli(logits=tf.layers.dense(tf.cast(zs[0], tf.float32), 28 * 28, activation=None)) # INFERENCE # Define variational model with reverse ordering as probability model: # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up. qzs = [0] * len(FLAGS.hidden_sizes) for l in range(len(FLAGS.hidden_sizes)): if l == 0: logits = tf.layers.dense(tf.cast(x_ph, tf.float32), FLAGS.hidden_sizes[l], activation=None) else: logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) qzs[l] = Bernoulli(logits=logits) inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(FLAGS.step_size) inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples) # Build tensor for log-likelihood given one variational sample to run # on test data. x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)}) x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) / tf.cast(tf.shape(x_ph)[0], tf.float32)) sess = ed.get_session() tf.global_variables_initializer().run() for epoch in range(FLAGS.n_epoch): print("Epoch {}".format(epoch)) train_loss = 0.0 pbar = Progbar(FLAGS.n_iter_per_epoch) for t in range(1, FLAGS.n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) train_loss += info_dict['loss'] # Print per-data point loss, averaged over training epoch. train_loss /= FLAGS.n_iter_per_epoch train_loss /= FLAGS.batch_size print("Training negative log-likelihood: {:0.3f}".format(train_loss)) test_loss = [sess.run(x_neg_log_prob, {x_ph: x_test}) for _ in range(FLAGS.n_test_samples)] test_loss = np.mean(test_loss) print("Test negative log-likelihood: {:0.3f}".format(test_loss)) # Prior predictive check. images = sess.run(x, {x_ph: x_batch}) # feed ph to determine sample size for m in range(FLAGS.batch_size): imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
def test_scope(self): with self.test_session(): x = tf.constant(2.0) x_new = ed.copy(x, scope='new_scope') self.assertTrue(x_new.name.startswith('new_scope'))
D = 10 # number of features # DATA coeff = np.random.randn(D) X_train, y_train = build_toy_dataset(N, coeff) X_test, y_test = build_toy_dataset(N, coeff) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run(n_samples=5, n_iter=250) # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
# doing this because I don't know how to make the testing work otherwise # it seems like the test and training data need to have the same N X_test, y_test = X_test[:-1, :], y_test[:-1] # unfortunately not sure how to make the linear kernel work at this moment N, P = X_train.shape X_tf = tf.placeholder(tf.float32, [N, P]) # latent stochastic function # ok so here in the loc position is where we can get (x *element-wise* b) b = Bernoulli(varbvs_prior, dtype=np.float32) # prior from varbvs gp_mu = tf.reduce_mean(tf.multiply(X_tf, tf.reshape(tf.tile(b, [N]), [N, P])), 1) # mean for prior over GP f = MultivariateNormalTriL( loc=gp_mu, scale_tril=tf.cholesky(rbf(X_tf)) # uses rbf kernel for covariance of GP for now ) qf = Normal(loc=tf.get_variable("qf/loc", [N]), scale=tf.nn.softplus(tf.get_variable("qf/scale", [N]))) # respose y_tf = Bernoulli(logits=f) # inference infer = ed.KLqp({f: qf}, data={X_tf: X_train, y_tf: y_train}) infer.run(n_samples=3, n_iter=5000) # criticism y_post = ed.copy(y_tf, {f: qf}) ed.evaluate('binary_accuracy', data={X_tf: X_test, y_post: y_test})
qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T, 1]))) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[nburn:T:stride], qw.params.eval()[nburn:T:stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400)
def swap(self, dict_swap, scope="copied", replace_itself=False, copy_q=False): """ Original implementation: Edward (https://github.com/blei-lab/edward) Build a new node in the TensorFlow graph from `org_instance`, where any of its ancestors existing in `dict_swap` are replaced with `dict_swap`'s corresponding value. The copying is done recursively, so any `Operation` whose output is required to evaluate `org_instance` is also copied (if it isn't already copied within the new scope). This is with the exception of `tf.Variable`s, `tf.placeholder`s, and nodes of type `Queue`, which are reused and not newly copied. Parameters ---------- dict_swap : dict, optional Random variables, variables, tensors, or operations to swap with. Its keys are what `org_instance` may depend on, and its values are the corresponding object (not necessarily of the same class instance, but must have the same type, e.g., float32) that is used in exchange. scope : str, optional A scope for the new node(s). This is used to avoid name conflicts with the original node(s). replace_itself : bool, optional Whether to replace `org_instance` itself if it exists in `dict_swap`. (This is used for the recursion.) copy_q : bool, optional Whether to copy the replaced tensors too (if not already copied within the new scope). Otherwise will reuse them. Returns ------- RandomVariable, tf.Variable, tf.Tensor, or tf.Operation The copied node. Examples -------- >>> from odin import backend as K >>> x = tf.constant(2.0, name='x') >>> y = tf.constant(3.0, name='y') >>> z = tf.multiply(x, y, name="z") ... # define replacement variables >>> qx = tf.constant(4.0, name='qx') >>> qz = tf.constant(25.0, name='qz') ... # The TensorFlow graph is currently ... # `x` -> `z` <- y`, `qx` ... # This adds a subgraph with newly copied nodes, ... # `copied/qx` -> `copied/z` <- `copied/y` >>> z_new = K.ComputationGraph(z).swap( ... dict_swap={x: qx, z: qz}, ... replace_itself=False, copy_q=False) >>> print([v.name for v in K.ComputationGraph(z_new).variables]) ... # [u'qx:0', u'copied/y:0', u'copied/z:0', u'copied/w:0'] >>> sess = tf.Session() >>> sess.run(z) # 6.0 >>> sess.run(z_new) # 12.0 ... # with replace_itself = True >>> z_new = K.ComputationGraph(z).swap( ... dict_swap={x: qx, z: qz}, ... replace_itself=True, copy_q=False) >>> print([v.name for v in K.ComputationGraph(z_new).variables]) ... # [u'qx:0', u'copied/y:0', u'qz:0', u'copied/w:0'] >>> sess.run(z_new) # 25.0 """ try: from edward import copy except ImportError: raise RuntimeError("Require Edward library to manipulate the " "ComputationGraph.") outputs_new = [] for o in self.outputs: o_new = copy(org_instance=o, dict_swap=dict_swap, scope=scope, replace_itself=replace_itself, copy_q=copy_q) dict_swap[o] = o_new outputs_new.append(o_new) return outputs_new