def _test_linear_regression(self, default, dtype): def build_toy_dataset(N, w, noise_std=0.1): D = len(w) x = np.random.randn(N, D) y = np.dot(x, w) + np.random.normal(0, noise_std, size=N) return x, y with self.test_session() as sess: N = 40 # number of data points D = 10 # number of features w_true = np.random.randn(D) X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) X = tf.placeholder(dtype, [N, D]) w = Normal(loc=tf.zeros(D, dtype=dtype), scale=tf.ones(D, dtype=dtype)) b = Normal(loc=tf.zeros(1, dtype=dtype), scale=tf.ones(1, dtype=dtype)) y = Normal(loc=ed.dot(X, w) + b, scale=0.1 * tf.ones(N, dtype=dtype)) n_samples = 2000 if not default: qw = Empirical( tf.Variable(tf.zeros([n_samples, D], dtype=dtype))) qb = Empirical( tf.Variable(tf.zeros([n_samples, 1], dtype=dtype))) inference = ed.HMC({ w: qw, b: qb }, data={ X: X_train, y: y_train }) else: inference = ed.HMC([w, b], data={X: X_train, y: y_train}) qw = inference.latent_vars[w] qb = inference.latent_vars[b] inference.run(step_size=0.01) self.assertAllClose(qw.mean().eval(), w_true, rtol=5e-1, atol=5e-1) self.assertAllClose(qb.mean().eval(), [0.0], rtol=5e-1, atol=5e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL( loc=tf.ones(2), scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [1000, 2])) inference = ed.HMC({z: qz}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) fig, ax = plt.subplots() trace = sess.run(qz.params) ax.scatter(trace[:, 0], trace[:, 1], marker=".") mvn_plot_contours(z, ax=ax) plt.show()
def main(_): # data J = 8 data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) # model definition mu = Normal(0., 10.) logtau = Normal(5., 1.) theta_prime = Normal(tf.zeros(J), tf.ones(J)) sigma = tf.placeholder(tf.float32, J) y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J])) data = {y: data_y, sigma: data_sigma} # ed.KLqp inference with tf.variable_scope('q_logtau'): q_logtau = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_mu'): q_mu = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_theta_prime'): q_theta_prime = Normal(tf.get_variable('loc', [J]), tf.nn.softplus(tf.get_variable('scale', [J]))) inference = ed.KLqp({logtau: q_logtau, mu: q_mu, theta_prime: q_theta_prime}, data=data) inference.run(n_samples=15, n_iter=60000) print("==== ed.KLqp inference ====") print("E[mu] = %f" % (q_mu.mean().eval())) print("E[logtau] = %f" % (q_logtau.mean().eval())) print("E[theta_prime]=") print((q_theta_prime.mean().eval())) print("==== end ed.KLqp inference ====") print("") print("") # HMC inference S = 400000 burn = S // 2 hq_logtau = Empirical(tf.get_variable('hq_logtau', [S])) hq_mu = Empirical(tf.get_variable('hq_mu', [S])) hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J])) inference = ed.HMC({logtau: hq_logtau, mu: hq_mu, theta_prime: hq_theta_prime}, data=data) inference.run() print("==== ed.HMC inference ====") print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean())) print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean())) print("E[theta_prime]=") print(hq_theta_prime.params.eval()[burn:, ].mean(0)) print("==== end ed.HMC inference ====") print("") print("")
def test_hmc_default(self): with self.test_session() as sess: x = TransformedDistribution( distribution=Normal(1.0, 1.0), bijector=tf.contrib.distributions.bijectors.Softplus()) x.support = 'nonnegative' inference = ed.HMC([x]) inference.initialize(auto_transform=True, step_size=0.8) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) # Check approximation on constrained space has same moments as # target distribution. n_samples = 10000 x_unconstrained = inference.transformations[x] qx = inference.latent_vars[x_unconstrained] qx_constrained = Empirical( x_unconstrained.bijector.inverse(qx.params)) x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0) qx_mean, qx_var = tf.nn.moments(qx_constrained.params[500:], 0) stats = sess.run([x_mean, qx_mean, x_var, qx_var]) self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1) self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
def test_hmc(self): with self.test_session(): N, D, W_1, W_2, W_3, b_1, b_2, X, y, X_train, y_train = self._test( ) T = 1 # number of MCMC samples qW_1 = Empirical(params=tf.Variable(tf.random_normal([T, D, 20]))) qW_2 = Empirical(params=tf.Variable(tf.random_normal([T, 20, 15]))) qW_3 = Empirical(params=tf.Variable(tf.random_normal([T, 15, 1]))) qb_1 = Empirical(params=tf.Variable(tf.random_normal([T, 20]))) qb_2 = Empirical(params=tf.Variable(tf.random_normal([T, 15]))) inference = ed.HMC( { W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3 }, data={ y: y_train, X: X_train }) inference.run()
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2/D)*tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2/n_hidden)*tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2/D)*tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2/n_hidden)*tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std*D**(-.5)*tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*n_hidden**(-.5)*tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std*D**(-.5)*tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std*n_hidden**(-.5)*tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden])) W_1 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden)) b_1 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size = disc*leap_size, n_steps = step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size = disc*leap_size, friction=disc**2*0.1, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1)
def _test_normal_normal(self, default, dtype): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=tf.constant(0.0, dtype=dtype), scale=tf.constant(1.0, dtype=dtype)) x = Normal(loc=mu, scale=tf.constant(1.0, dtype=dtype), sample_shape=50) n_samples = 2000 # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) if not default: qmu = Empirical( params=tf.Variable(tf.ones(n_samples, dtype=dtype))) inference = ed.HMC({mu: qmu}, data={x: x_data}) else: inference = ed.HMC([mu], data={x: x_data}) qmu = inference.latent_vars[mu] inference.run() self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu = Empirical(params=tf.Variable(tf.ones(2000))) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.run() self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-2, atol=1e-2) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-2, atol=1e-2)
def test_indexedslices(self): """Test that gradients accumulate when tf.gradients doesn't return tf.Tensor (IndexedSlices).""" with self.test_session() as sess: N = 10 # number of data points K = 2 # number of clusters T = 1 # number of MCMC samples x_data = np.zeros(N, dtype=np.float32) mu = Normal(0.0, 1.0, sample_shape=K) c = Categorical(logits=tf.zeros(N)) x = Normal(tf.gather(mu, c), tf.ones(N)) qmu = Empirical(params=tf.Variable(tf.ones([T, K]))) qc = Empirical(params=tf.Variable(tf.ones([T, N]))) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.initialize()
def main(_): # DATA trait_true = np.random.normal(size=[FLAGS.nsubj, 1]) thresh_true = np.random.normal(size=[1, FLAGS.nitem]) X_data = np.random.binomial(1, expit(trait_true - thresh_true)) # MODEL trait = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.nsubj, 1]) thresh = Normal(loc=0.0, scale=1.0, sample_shape=[1, FLAGS.nitem]) X = Bernoulli(logits=trait - thresh) # INFERENCE q_trait = Empirical(params=tf.get_variable("q_trait/params", [FLAGS.T, FLAGS.nsubj, 1])) q_thresh = Empirical(params=tf.get_variable("q_thresh/params", [FLAGS.T, 1, FLAGS.nitem])) inference = ed.HMC({trait: q_trait, thresh: q_thresh}, data={X: X_data}) inference.run(step_size=0.1) # Alternatively, use variational inference. # q_trait = Normal( # loc=tf.get_variable("q_trait/loc", [FLAGS.nsubj, 1]), # scale=tf.nn.softplus( # tf.get_variable("q_trait/scale", [FLAGS.nsubj, 1]))) # q_thresh = Normal( # loc=tf.get_variable("q_thresh/loc", [1, FLAGS.nitem]), # scale=tf.nn.softplus( # tf.get_variable("q_thresh/scale", [1, FLAGS.nitem]))) # inference = ed.KLqp({trait: q_trait, thresh: q_thresh}, data={X: X_data}) # inference.run(n_iter=2500, n_samples=10) # CRITICISM # Check that the inferred posterior mean captures the true traits. plt.scatter(trait_true, q_trait.mean().eval()) plt.show() print("MSE between true traits and inferred posterior mean:") print(np.mean(np.square(trait_true - q_trait.mean().eval())))
def run(self, data, method="klqp", **kwargs): if method == "klqp": print(">> Initializing ... ", end="") inference = ed.KLqp(self.unwind_latent_vars(), data=data) inference.initialize(**kwargs) print("ok") # RUNNING THE INFERENCE sess = ed.get_session() init = tf.global_variables_initializer() init.run() losses = [] for _ in tqdm(range(inference.n_iter)): info_dict = inference.update() losses.append(info_dict['loss']) plt.figure(figsize=(7, 3)) plt.title("Loss") plt.semilogy(losses) plt.show() elif method == "hmc": print(">> Initializing ... ", end="") inference = ed.HMC(self.unwind_latent_vars(), data=data) inference.initialize(**kwargs) print("ok") # RUNNING THE INFERENCE sess = ed.get_session() init = tf.global_variables_initializer() init.run() acceptance_rates = [] for _ in tqdm(range(inference.n_iter)): info_dict = inference.update() acceptance_rates.append(info_dict['accept_rate']) plt.figure(figsize=(7, 3)) plt.title("Acceptance Rate") plt.semilogy(acceptance_rates) plt.show()
def main(_): ed.set_seed(42) # DATA x_data = np.array([0.0] * 50) # MODEL: Normal-Normal with known variance mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) # INFERENCE qmu = Empirical(params=tf.get_variable("qmu/params", [1000], initializer=tf.zeros_initializer())) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qmu.mean(), qmu.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) # Check convergence with visual diagnostics. samples = sess.run(qmu.params) # Plot histogram. plt.hist(samples, bins='auto') plt.show() # Trace plot. plt.plot(samples) plt.show()
# tf.select = tf.where ed.set_seed(42) # MODEL #tf_x = tf.Variable(x.T, trainable=False) tf_x = tf.Variable(x.T, trainable=False, dtype=tf.float32) # Standard normal prior on coefficients #beta = ed.models.Normal(mu=tf.zeros(D), sigma=tf.ones(D)) beta = ed.models.Normal(loc=tf.zeros(D), scale=tf.ones(D)) logit_pred = tf.squeeze(tf.matmul(tf.expand_dims(beta, 0), tf_x)) #ed_y = ed.models.BernoulliWithSigmoidP(p=logit_pred) ed_y = ed.models.Bernoulli(logits=logit_pred) # INFERENCE qbeta = ed.models.Empirical(params=tf.Variable(tf.zeros([n_iterations, D]))) #inference = ed.HMC({beta:qbeta}, data={ed_y:y}) inference = ed.HMC({beta: qbeta}, data={ed_y: y.astype(int)}) t0 = time.time() inference.run(step_size=step_size, n_steps=n_steps) ed_time = time.time() - t0 print 'Edward took %.3f seconds' % ed_time sess = ed.get_session() ed_samples = sess.run(qbeta.params) #plot(ed_samples[:, 0])
def train(self, X_train, y_train, X_val, is_print=True): ''' set up BNN and run HMC inference ''' def neural_network(X): # set up the BNN structure using tf if self.activation_fn == 'relu': h = tf.maximum(tf.matmul(X, W_0) + b_0, 0) # relu elif self.activation_fn == 'Lrelu': a = 0.2 h = tf.maximum( tf.matmul(X, W_0) + b_0, a * (tf.matmul(X, W_0) + b_0)) # leakly relu elif self.activation_fn == 'erf': h = tf.erf(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'tanh': h = tf.tanh(tf.matmul(X, W_0) + b_0) # h = tf.tanh(1.23*tf.matmul(X, W_0) + b_0) # add 1.23 for close to GP erf elif self.activation_fn == 'sigmoid': h = tf.sigmoid(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'softplus': self.c = 2. # if this is bigger -> relu behaviour, but less 'soft' h = tf.divide( tf.log( tf.exp(tf.multiply(tf.matmul(X, W_0) + b_0, c)) + 1), c) elif self.activation_fn == 'rbf': self.beta_2 = 1 / (2 * self.g_var) h = tf.exp(-self.beta_2 * tf.square(X - W_0)) h = tf.matmul(h, W_1) #+ b_1 return tf.reshape(h, [-1]) def neural_network_deep(X): # set up the BNN structure using tf if self.activation_fn == 'relu': h1 = tf.maximum(tf.matmul(X, W_0) + b_0, 0) # relu h = tf.maximum(tf.matmul(h1, W_1) + b_1, 0) # relu elif self.activation_fn == 'Lrelu': a = 0.2 h1 = tf.maximum( tf.matmul(X, W_0) + b_0, a * (tf.matmul(X, W_0) + b_0)) # leakly relu h = tf.maximum( tf.matmul(h1, W_1) + b_1, a * (tf.matmul(h1, W_1) + b_1)) # leakly relu elif self.activation_fn == 'erf': h1 = tf.erf(tf.matmul(X, W_0) + b_0) h = tf.erf(tf.matmul(h1, W_1) + b_1) else: raise Exception('tp: activation not implemented') h = tf.matmul(h, W_2) #+ b_2 return tf.reshape(h, [-1]) if self.activation_fn == 'relu' or self.activation_fn == 'softplus' or self.activation_fn == 'Lrelu': init_stddev_0_w = np.sqrt(self.w_0_var) # /d_in init_stddev_0_b = np.sqrt(self.b_0_var) # /d_in init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) #*np.sqrt(10) # 2nd layer init. dist elif self.activation_fn == 'tanh' or self.activation_fn == 'erf': init_stddev_0_w = np.sqrt( self.w_0_var) # 1st layer init. dist for weights init_stddev_0_b = np.sqrt(self.b_0_var) # for bias init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist elif self.activation_fn == 'rbf': init_stddev_0_w = np.sqrt(self.u_var) # centres = sig_u init_stddev_0_b = np.sqrt(self.g_var) # fixed /beta init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist n = X_train.shape[0] X_dim = X_train.shape[1] y_dim = 1 #y_train.shape[1] with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([X_dim, self.hidden_size]), scale=init_stddev_0_w * tf.ones([X_dim, self.hidden_size]), name="W_0") if self.deep_NN == False: W_1 = Normal(loc=tf.zeros([self.hidden_size, y_dim]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_1") b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_0_b * tf.ones(self.hidden_size), name="b_0") b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_1") else: W_1 = Normal( loc=tf.zeros([self.hidden_size, self.hidden_size]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_1") b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_0_b * tf.ones(self.hidden_size), name="b_0") W_2 = Normal(loc=tf.zeros([self.hidden_size, y_dim]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_2") b_1 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_1_w * tf.ones(self.hidden_size), name="b_1") b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") X = tf.placeholder(tf.float32, [n, X_dim], name="X") if self.deep_NN == False: y = Normal(loc=neural_network(X), scale=np.sqrt(self.data_noise) * tf.ones(n), name="y") else: y = Normal(loc=neural_network_deep(X), scale=np.sqrt(self.data_noise) * tf.ones(n), name="y") # inference if self.deep_NN == False: qW_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, X_dim, self.hidden_size]))) qW_1 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size, y_dim]))) qb_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_1 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim]))) else: qW_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, X_dim, self.hidden_size]))) qW_1 = Empirical( tf.Variable( tf.zeros( [self.n_samples, self.hidden_size, self.hidden_size]))) qW_2 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size, y_dim]))) qb_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_1 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_2 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim]))) # get some priors ### !!! TODO, turn this into a proper function # X_pred = X_val.astype(np.float32).reshape((X_val.shape[0], 1)) # self.y_priors = tf.stack([nn_predict(X_pred, W_0.sample(), W_1.sample(),b_0.sample(), b_1.sample()) # for _ in range(10)]) # Neal 2012 # Too large a stepsize will result in a very low acceptance rate for states # proposed by simulating trajectories. Too small a stepsize will either waste # computation time, by the same factor as the stepsize is too small, or (worse) # will lead to slow exploration by a random walk, # https://stats.stackexchange.com/questions/304942/how-to-set-step-size-in-hamiltonian-monte-carlo # If ϵ is too large, then there will be large discretisation error and low acceptance, if ϵ # is too small then more expensive leapfrog steps will be required to move large distances. # Ideally we want the largest possible value of ϵ # that gives reasonable acceptance probability. Unfortunately this may vary for different values of the target variable. # A simple heuristic to set this may be to do a preliminary run with fixed L, # gradually increasing ϵ until the acceptance probability is at an appropriate level. # Setting the trajectory length by trial and error therefore seems necessary. # For a problem thought to be fairly difficult, a trajectory with L = 100 might be a # suitable starting point. If preliminary runs (with a suitable ε; see above) show that HMC # reaches a nearly independent point after only one iteration, a smaller value of L might be # tried next. (Unless these “preliminary” runs are actually sufficient, in which case there is # of course no need to do more runs.) If instead there is high autocorrelation in the run # with L = 100, runs with L = 1000 might be tried next # It may also be advisable to randomly sample ϵ # and L form suitable ranges to avoid the possibility of having paths that are close to periodic as this would slow mixing. if self.deep_NN == False: inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={ X: X_train, y: y_train.ravel() }) else: inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={ X: X_train, y: y_train.ravel() }) inference.run(step_size=self.step_size, n_steps=self.n_steps) # logdir='log' # drop first chunk of burn in samples if self.deep_NN == False: self.qW_0_keep = qW_0.params[self.burn_in:].eval() self.qW_1_keep = qW_1.params[self.burn_in:].eval() self.qb_0_keep = qb_0.params[self.burn_in:].eval() self.qb_1_keep = qb_1.params[self.burn_in:].eval() else: self.qW_0_keep = qW_0.params[self.burn_in:].eval() self.qW_1_keep = qW_1.params[self.burn_in:].eval() self.qb_0_keep = qb_0.params[self.burn_in:].eval() self.qW_2_keep = qW_2.params[self.burn_in:].eval() self.qb_1_keep = qb_1.params[self.burn_in:].eval() self.qb_2_keep = qb_2.params[self.burn_in:].eval() return
def ed_graph_init(): # Graph for prior distributions if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, D]) # Regression likelihood y = Normal(loc=nn(x, W_0, b_0, W_1, b_1), scale=std_out * tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.float32, [None]) # Graph for posterior distribution if str(sys.argv[4]) == 'normal': qW_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, D, n_hidden]))) qW_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden, K], stddev=std * (n_hidden**-.5)))) qb_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, n_hidden]))) qb_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, K], stddev=std * (n_hidden**-.5)))) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': # Use a placeholder otherwise cannot assign a tensor > 2GB w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) # Build inference graph if str(sys.argv[3]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1) else: return (x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference
from edward.models import Bernoulli, Normal, Empirical from scipy.special import expit # DATA nsubj = 200 nitem = 25 trait_true = np.random.normal(size=[nsubj, 1]) thresh_true = np.random.normal(size=[1, nitem]) X_data = np.random.binomial(1, expit(trait_true - thresh_true)) # MODEL trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1])) thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem])) X = Bernoulli(logits=tf.sub(trait, thresh)) # INFERENCE T = 5000 # number of posterior samples q_trait = Empirical(params=tf.Variable(tf.zeros([T, nsubj, 1]))) q_thresh = Empirical(params=tf.Variable(tf.zeros([T, 1, nitem]))) inference = ed.HMC({trait: q_trait, thresh: q_thresh}, data={X: X_data}) inference.run(step_size=0.1) # CRITICISM # Check that the inferred posterior mean captures the true traits. plt.scatter(trait_true, q_trait.mean().eval()) plt.show() print("MSE between true traits and inferred posterior mean:") print(np.mean(np.square(trait_true - q_trait.mean().eval())))
"""Correlated normal posterior. Inference with Hamiltonian Monte Carlo. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Empirical, MultivariateNormalFull ed.set_seed(42) # MODEL z = MultivariateNormalFull(mu=tf.ones(2), sigma=tf.constant([[1.0, 0.8], [0.8, 1.0]])) # INFERENCE qz = Empirical(params=tf.Variable(tf.random_normal([1000, 2]))) inference = ed.HMC({z: qz}) inference.run() # CRITICISM sess = ed.get_session() mean, std = sess.run([qz.mean(), qz.std()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior std:") print(std)
from edward.models import Empirical, Normal ed.set_seed(42) # DATA x_data = np.array([0.0] * 50) # MODEL: Normal-Normal with known variance mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) # INFERENCE qmu = Empirical(params=tf.Variable(tf.zeros(1000))) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.run() # CRITICISM sess = ed.get_session() mean, std = sess.run([qmu.mean(), qmu.std()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior std:") print(std) # Check convergence with visual diagnostics. samples = sess.run(qmu.params) # Plot histogram. plt.hist(samples, bins='auto')
#data = tf.constant("C", shape=(N,)) #??? #data = tf.constant(0, shape=(N,)) #data = tf.constant(20, shape=(N,)) data = np.ones((N,))*17 ##Infer: T=10000 qtheta = Empirical(params=tf.Variable(0.5+tf.zeros([T]))) #Why need tf.Variable here? tf.summary.scalar('qtheta', qtheta) #proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,)) # proposal_theta = Normal(loc=theta,scale=0.5) # inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {formulas: data}) sess = ed.get_session() inference = ed.HMC({theta: qtheta}, {formulas: data}) inference.initialize() tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() train_writer = tf.summary.FileWriter('/tmp/tensorflow/',sess.graph) # qtheta = Beta(tf.Variable(1.0), tf.Variable(1.0)) #Why need tf.Variable here? # inference = ed.KLqp({theta: qtheta}, {formulas: data})
w0 = tf.Variable(p0) w1 = tf.Variable(p1) b0 = tf.Variable(pp0) b1 = tf.Variable(pp1) # Empirical distribution qW_0 = Empirical(params=w0) qW_1 = Empirical(params=w1) qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) if str(sys.argv[2]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[2]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the infernce variables if str(sys.argv[2]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100)
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=3.0 * tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T])) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) # Alternatively, use variational inference. # qw_loc = tf.get_variable("qw_loc", [FLAGS.D]) # qw_scale = tf.nn.softplus(tf.get_variable("qw_scale", [FLAGS.D])) # qb_loc = tf.get_variable("qb_loc", []) + 10.0 # qb_scale = tf.nn.softplus(tf.get_variable("qb_scale", [])) # qw = Normal(loc=qw_loc, scale=qw_scale) # qb = Normal(loc=qb_loc, scale=qb_scale) # inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) # inference.initialize(n_print=10, n_iter=600) tf.global_variables_initializer().run() # Set up figure. fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) # Build samples from inferred posterior. n_samples = 50 inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) probs = tf.stack([ tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) for _ in range(n_samples) ]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: outputs = probs.eval() # Plot data and functions plt.cla() ax.plot(X_train[:], y_train, 'bx') for s in range(n_samples): ax.plot(inputs[:], outputs[s], alpha=0.2) ax.set_xlim([-5, 3]) ax.set_ylim([-0.5, 1.5]) plt.draw() plt.pause(1.0 / 60.0)
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**-.5 * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**-.5 * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=10 * n_hidden**(-.5) * tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=10 * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]), loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [N]) # Use a placeholder for the pre-trained posteriors p0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) p1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden]) p2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) pp0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp1 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp2 = tf.placeholder(tf.float32, [n_samp, K]) w0 = tf.Variable(p0) w1 = tf.Variable(p1) w2 = tf.Variable(p2) b0 = tf.Variable(pp0) b1 = tf.Variable(pp1) b2 = tf.Variable(pp2) # Empirical distribution qW_0 = Empirical(params=w0) qW_1 = Empirical(params=w1) qW_2 = Empirical(params=w2) qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) qb_2 = Empirical(params=b2) if str(sys.argv[3]) == 'hmc': inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100, scale={y: float(mnist.train.num_examples) / N}) return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, inference, p0, p1, p2, pp0, pp1, pp2, w0, w1, w2, b0, b1, b2)
# for i in range(10): # print(x.eval()) ##Observations: #data=tf.ones(10, dtype=tf.int32) #NOT WORKING! data = [1, 1, 1, 1, 1, 1, 1, 1, 0, 1] ##Infer: #Variational #qtheta = Beta(tf.Variable(1.0), tf.Variable(1.0)) #Why need tf.Variable here? # inference = ed.KLqp({theta: qtheta}, {x: data}) # inference.run(n_samples=5, n_iter=1000) #MonteCarlo T = 10000 qtheta = Empirical( params=tf.Variable(0.5 + tf.zeros([T, 1])) ) #Beta(tf.Variable(1.0), tf.Variable(1.0)) #Why need tf.Variable here? #proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,)) #proposal_theta = Normal(loc=theta,scale=0.5) #inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {x: data}) inference = ed.HMC({theta: qtheta}, {x: data}) inference.run() ##Results: qtheta_samples = qtheta.sample(1000).eval() print(qtheta_samples.mean()) plt.hist(qtheta_samples) plt.show()
def train(self, filename, total_batches=10, discrete_batch_iters=1000, continus_batch_iters=10000): sess = tf.Session() restorer = tf.train.import_meta_graph(filename, clear_devices=True) print("<meta graph imported>") [ tf.add_to_collection( 'd_pi_q', Empirical(tf.Variable(tf.zeros(tf.shape(var))), name='Empirical_d_pi_q_' + str.split(str.split(var.name, '/')[0], '_')[-2])) for var in tf.get_collection('d_pi') ] for var in tf.get_collection('c_w'): idx = str.split(str.split(var.name, '/')[0], '_')[-2] tf.add_to_collection( 'c_w_q', Empirical(tf.Variable(tf.zeros(tf.shape(var))), name='Empirical_c_w_q_' + idx)) print(var.get_shape().as_list()) tf.add_to_collection( 'c_b_q', Empirical(tf.Variable(tf.zeros( var.get_shape().as_list()[:-1])), name='Empirical_c_b_q_' + idx)) tf.add_to_collection( 'c_sigma_q', Empirical(tf.Variable(tf.zeros([1])), name='Empirical_c_sigma_q_' + idx)) print("<variables collected>") variable_map = dict( zip( tf.get_collection('d') + tf.get_collection('c'), self.design_matrix[:, tuple(np.arange(self.num_discrete_variables) )].flatten('F').tolist() + self.design_matrix[:, self.continus_variable_idxs].flatten( 'F').tolist())) discrete_prior_map = dict( zip(tf.get_collection('d_pi'), tf.get_collection('d_pi_q'))) continus_prior_map = dict( zip( tf.get_collection('c_w') + tf.get_collection('c_b') + tf.get_collection('c_sigma'), tf.get_collection('c_w_q') + tf.get_collection('c_b_q') + tf.get_collection('c_sigma_q'))) print("<running inference>") inference_d = ed.Gibbs(discrete_prior_map, data=dict(variable_map.items() + continus_prior_map.items())) inference_c = ed.HMC(continus_prior_map, data=dict(variable_map.items() + discrete_prior_map.items())) inference_d.initialize(n_iter=discrete_batch_iters) inference_c.initialize(n_iter=continus_batch_iters) sess.run(tf.global_variables_initializer()) for _ in range(total_batches): for _ in range(inference_d.n_iter): info_dict = inference_d.update() inference_d.print_progress(info_dict) inference_d.n_iter += discrete_batch_iters inference_d.n_print = int(discrete_batch_iters / 10) inference_d.progbar = Progbar(inference_d.n_iter) for _ in range(inference_c.n_iter): info_dict = inference_c.update() inference_c.print_progress(info_dict) inference_c.n_iter += continus_batch_iters inference_c.n_print = int(continus_batch_iters / 10) inference_c.progbar = Progbar(inference_c.n_iter) inference_d.finalize() inference_c.finalize() filename = ''.join(str.split(filename, '.')[:-1], '.') + '_trained_model' saver = tf.train.Saver() saver.save(sess, filename) tf.train.export_meta_graph( filename + '.meta', as_text=True, collection_list=['d_pi', 'd', 'c_w', 'c_b', 'c_sigma', 'c'])
N = 40 # number of data points D = 1 # number of features X_train, y_train = build_toy_dataset(N) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) b = Normal(loc=tf.zeros([]), scale=1.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # inference T = 5000 qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T]))) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() # criticism & set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) n_samples = 50 inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1)) probs = tf.stack([ tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample()) for _ in range(n_samples)
def test_monte_carlo(self): tf.InteractiveSession() ed.set_seed(42) # DATA X_train = np.zeros([500, 100]) y_train = np.zeros(500) N = X_train.shape[0] # data points D = X_train.shape[1] # feature T = 1 # number of MCMC samples # MODEL W_1 = Normal(mu=tf.zeros([D, 20]), sigma=tf.ones([D, 20]) * 100) W_2 = Normal(mu=tf.zeros([20, 15]), sigma=tf.ones([20, 15]) * 100) W_3 = Normal(mu=tf.zeros([15, 1]), sigma=tf.ones([15, 1]) * 100) b_1 = Normal(mu=tf.zeros(20), sigma=tf.ones(20) * 100) b_2 = Normal(mu=tf.zeros(15), sigma=tf.ones(15) * 100) x_ph = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=four_layer_nn(x_ph, W_1, W_2, W_3, b_1, b_2)) # INFERENCE qW_1 = Empirical(params=tf.Variable(tf.random_normal([T, D, 20]))) qW_2 = Empirical(params=tf.Variable(tf.random_normal([T, 20, 15]))) qW_3 = Empirical(params=tf.Variable(tf.random_normal([T, 15, 1]))) qb_1 = Empirical(params=tf.Variable(tf.random_normal([T, 20]))) qb_2 = Empirical(params=tf.Variable(tf.random_normal([T, 15]))) # note ideally these would be separate test methods; there's an # issue with the tensorflow graph when re-running the above # unfortunately inference = ed.HMC( { W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3 }, data={ y: y_train, x_ph: X_train }) inference.run() inference = ed.SGLD( { W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3 }, data={ y: y_train, x_ph: X_train }) inference.run() inference = ed.MetropolisHastings( { W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3 }, { W_1: W_1, b_1: b_1, W_2: W_2, b_2: b_2, W_3: W_3 }, data={ y: y_train, x_ph: X_train }) inference.run()
import numpy as np import tensorflow as tf from edward.models import Bernoulli, Empirical, Normal from scipy.special import expit ed.set_seed(123) N = 5810 # number of data points D = 54 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.Variable(X_data.astype(np.float32), trainable=False) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE T = 5000 qw = Empirical(params=tf.Variable(tf.zeros([T, D]))) inference = ed.HMC({w: qw}, data={y: y_data}) inference.run(step_size=0.05) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def main(_): outdir = setup_outdir() ed.set_seed(FLAGS.seed) ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape print("Xtrain") print(Xtrain) print(Xtrain.shape) if 'synthetic' in FLAGS.exp: w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) #n_posterior_samples = 100000 n_posterior_samples = 10 qw_empirical = Empirical( params=tf.get_variable("qw/params", [n_posterior_samples, D])) inference = ed.HMC({w: qw_empirical}, data={X: Xtrain, y: ytrain}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() inference.run() empirical_samples = qw_empirical.sample(50).eval() #fig, ax = plt.subplots() #ax.scatter(posterior_samples[:,0], posterior_samples[:,1]) #plt.show() weights, q_components = [], [] ll_trains, ll_tests, bin_ac_trains, bin_ac_tests, elbos, rocs, gaps = [], [], [], [], [], [], [] total_time, times = 0., [] for iter in range(0, FLAGS.n_fw_iter): print("iter %d" % iter) g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): tf.set_random_seed(FLAGS.seed) # MODEL w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) X_test = tf.placeholder(tf.float32, [N_test, D_test]) y_test = Bernoulli(logits=ed.dot(X_test, w)) qw = construct_base_dist([D], iter, 'qw') inference_time_start = time.time() inference = relbo.KLqp({w: qw}, fw_iterates=get_fw_iterates( weights, w, q_components), data={ X: Xtrain, y: ytrain }, fw_iter=iter) tf.global_variables_initializer().run() inference.run(n_iter=FLAGS.LMO_iter) inference_time_end = time.time() total_time += float(inference_time_end - inference_time_start) joint = Joint(Xtrain, ytrain, sess) if iter > 0: qtw_prev = build_mixture(weights, q_components) gap = compute_duality_gap(joint, qtw_prev, qw) gaps.append(gap) np.savetxt(os.path.join(outdir, "gaps.csv"), gaps, delimiter=',') print("duality gap", gap) # update weights gamma = 2. / (iter + 2.) weights = [(1. - gamma) * w for w in weights] weights.append(gamma) # update components q_components = update_iterate(q_components, qw) if len(q_components) > 1 and FLAGS.fw_variant == 'fc': print("running fully corrective") # overwrite the weights weights = fully_corrective( build_mixture(weights, q_components), joint) if True: # remove inactivate iterates weights = list(weights) for i in reversed(range(len(weights))): if weights[i] == 0: del weights[i] del q_components[i] weights = np.array( weights ) # TODO type acrobatics to make elements deletable elif len(q_components ) > 1 and FLAGS.fw_variant == 'line_search': print("running line search") weights = line_search( build_mixture(weights[:-1], q_components[:-1]), qw, joint) qtw_new = build_mixture(weights, q_components) if False: for i, comp in enumerate(qtw_new.components): print("component", i, "\tmean", comp.mean().eval(), "\tstddev", comp.stddev().eval()) train_lls = [ sess.run(y.log_prob(ytrain), feed_dict={ X: Xtrain, w: qtw_new.sample().eval() }) for _ in range(50) ] train_lls = np.mean(train_lls, axis=0) ll_trains.append((np.mean(train_lls), np.std(train_lls))) test_lls = [ sess.run(y_test.log_prob(ytest), feed_dict={ X_test: Xtest, w: qtw_new.sample().eval() }) for _ in range(50) ] test_lls = np.mean(test_lls, axis=0) ll_tests.append((np.mean(test_lls), np.std(test_lls))) logits = np.mean([ np.dot(Xtest, qtw_new.sample().eval()) for _ in range(50) ], axis=0) ypred = tf.sigmoid(logits).eval() roc_score = roc_auc_score(ytest, ypred) rocs.append(roc_score) print('roc_score', roc_score) print('ytrain', np.mean(train_lls), np.std(train_lls)) print('ytest', np.mean(test_lls), np.std(test_lls)) order = np.argsort(ytest) plt.scatter(range(len(ypred)), ypred[order], c=ytest[order]) plt.savefig(os.path.join(outdir, 'ypred%d.pdf' % iter)) plt.close() np.savetxt(os.path.join(outdir, "train_lls.csv"), ll_trains, delimiter=',') np.savetxt(os.path.join(outdir, "test_lls.csv"), ll_tests, delimiter=',') np.savetxt(os.path.join(outdir, "rocs.csv"), rocs, delimiter=',') x_post = ed.copy(y, {w: qtw_new}) x_post_t = ed.copy(y_test, {w: qtw_new}) print( 'log lik train', ed.evaluate('log_likelihood', data={ x_post: ytrain, X: Xtrain })) print( 'log lik test', ed.evaluate('log_likelihood', data={ x_post_t: ytest, X_test: Xtest })) #ll_train = ed.evaluate('log_likelihood', data={x_post: ytrain, X:Xtrain}) #ll_test = ed.evaluate('log_likelihood', data={x_post_t: ytest, X_test:Xtest}) bin_ac_train = ed.evaluate('binary_accuracy', data={ x_post: ytrain, X: Xtrain }) bin_ac_test = ed.evaluate('binary_accuracy', data={ x_post_t: ytest, X_test: Xtest }) print('binary accuracy train', bin_ac_train) print('binary accuracy test', bin_ac_test) #latest_elbo = elbo(qtw_new, joint, w) #foo = ed.KLqp({w: qtw_new}, data={X: Xtrain, y: ytrain}) #op = myloss(foo) #print("myloss", sess.run(op[0], feed_dict={X: Xtrain, y: ytrain}), sess.run(op[1], feed_dict={X: Xtrain, y: ytrain})) #append_and_save(ll_trains, ll_train, "loglik_train.csv", np.savetxt) #append_and_save(ll_tests, ll_train, "loglik_test.csv", np.savetxt) #append_and_save(bin_ac_trains, bin_ac_train, "bin_acc_train.csv", np.savetxt) #append_and_save(bin_ac_tests, bin_ac_test, "bin_acc_test.csv", np.savetxt) ##append_and_save(elbos, latest_elbo, "elbo.csv", np.savetxt) #print('log-likelihood train ', ll_train) #print('log-likelihood test ', ll_test) #print('binary_accuracy train ', bin_ac_train) #print('binary_accuracy test ', bin_ac_test) #print('elbo', latest_elbo) times.append(total_time) np.savetxt(os.path.join(setup_outdir(), 'times.csv'), times) tf.reset_default_graph()
) # Inference arguments latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma} data = {y: y_train} # Inference inference = ed.KLqp(latent_vars, data) inference.run(n_samples=5, n_iter=2500) print(q_mu.mean().eval()) print(q_inv_softplus_sigma.mean().eval()) # Empirical Model with Sampler # Posterior distribution families q_mu = Empirical(params=tf.Variable(tf.random_normal([2000]))) q_inv_softplus_sigma = Empirical(params=tf.Variable(tf.random_normal([2000]))) # Inference arguments latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma} data = {y: y_train} # Inference inference = ed.HMC(latent_vars, data) inference.run(step_size=0.003, n_steps=5) print(tf.reduce_mean(q_mu.params[1000:]).eval()) print( tf.nn.softplus(tf.reduce_mean(q_inv_softplus_sigma.params[1000:])).eval())
os.makedirs(IMG_DIR) # DATA mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) x_train, _ = mnist.train.next_batch(N) # MODEL z = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE T = int(100 * 1000) qz = Empirical(params=tf.Variable(tf.random_normal([T, N, d]))) inference_e = ed.HMC({z: qz}, data={x: x_train}) inference_e.initialize() inference_m = ed.MAP(data={x: x_train, z: tf.gather(qz.params, inference_e.t)}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference_m.initialize(optimizer=optimizer) init = tf.global_variables_initializer() init.run() n_iter_per_epoch = 100 n_epoch = T // n_iter_per_epoch for epoch in range(n_epoch): avg_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]