def test_beta_bernoulli(self): with self.test_session() as sess: x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) qp = Empirical(tf.Variable(tf.zeros(1000))) inference = ed.Gibbs({p: qp}, data={x: x_data}) inference.run() true_posterior = Beta(3.0, 9.0) val_est, val_true = sess.run([qp.mean(), true_posterior.mean()]) self.assertAllClose(val_est, val_true, rtol=1e-2, atol=1e-2) val_est, val_true = sess.run([qp.variance(), true_posterior.variance()]) self.assertAllClose(val_est, val_true, rtol=1e-2, atol=1e-2)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu = Empirical(params=tf.Variable(tf.ones(5000))) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.SGLD({mu: qmu}, data={x: x_data}) inference.run(step_size=0.10) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-2, atol=1.5e-2) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=5e-2, atol=5e-2)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) qmu = Empirical(params=tf.Variable(tf.ones(2000))) proposal_mu = Normal(mu=0.0, sigma=1.0) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.MetropolisHastings({mu: qmu}, {mu: proposal_mu}, data={x: x_data}) inference.run() self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-2, atol=1e-2) self.assertAllClose(qmu.std().eval(), np.sqrt(1 / 51), rtol=1e-2, atol=1e-2)
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL(loc=tf.ones(2), scale_tril=tf.cholesky( tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [2000, 2])) inference = ed.SGLD({z: qz}) inference.run(step_size=5.0) # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev)
def _test_normal_normal(self, default, dtype): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=tf.constant(0.0, dtype=dtype), scale=tf.constant(1.0, dtype=dtype)) x = Normal(loc=mu, scale=tf.constant(1.0, dtype=dtype), sample_shape=50) n_samples = 2000 # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) if not default: qmu = Empirical( params=tf.Variable(tf.ones(n_samples, dtype=dtype))) inference = ed.ReplicaExchangeMC({mu: qmu}, {mu: mu}, data={x: x_data}) else: inference = ed.ReplicaExchangeMC([mu], {mu: mu}, data={x: x_data}) qmu = inference.latent_vars[mu] inference.run() self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def test_normalnormal_float32(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=tf.constant(0.0, dtype=tf.float64), scale=tf.constant(1.0, dtype=tf.float64)) x = Normal(loc=mu, scale=tf.constant(1.0, dtype=tf.float64), sample_shape=50) n_samples = 2000 qmu = Empirical(params=tf.Variable(tf.ones(n_samples, dtype=tf.float64))) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.MetropolisHastings({mu: qmu}, {mu: mu}, data={x: x_data}) inference.run() self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1)
def __init__(self, latent_vars, proposal_vars, data=None, inverse_temperatures=np.logspace(0, -2, 5), exchange_freq=0.1): """Create an inference algorithm. Args: proposal_vars: dict of RandomVariable to RandomVariable. Collection of random variables to perform inference on; each is binded to a proposal distribution $g(z' \mid z)$. inverse_temperatures: list of inverse temperature. exchange_freq: frequency of exchanging replica. """ check_latent_vars(proposal_vars) self.proposal_vars = proposal_vars super(ReplicaExchangeMC, self).__init__(latent_vars, data) self.n_replica = len(inverse_temperatures) if inverse_temperatures[0] != 1: raise ValueError("inverse_temperatures[0] must be 1.") self.inverse_temperatures = tf.cast(inverse_temperatures, dtype=list( self.latent_vars)[0].dtype) # Make replica. self.replica_vars = [] for i in range(self.n_replica): self.replica_vars.append({ z: Empirical(params=tf.Variable( tf.zeros(qz.params.shape, dtype=self.latent_vars[z].dtype))) for z, qz in six.iteritems(self.latent_vars) }) self.exchange_freq = exchange_freq
def __init__(self, latent_vars=None, data=None): """Create an inference algorithm. Args: latent_vars: list or dict. Collection of random variables (of type `RandomVariable` or `tf.Tensor`) to perform inference on. If list, each random variable will be approximated using a `Empirical` random variable that is defined internally (with unconstrained support). If dictionary, each value in the dictionary must be a `Empirical` random variable. data: dict. Data dictionary which binds observed variables (of type `RandomVariable` or `tf.Tensor`) to their realizations (of type `tf.Tensor`). It can also bind placeholders (of type `tf.Tensor`) used in the model to their realizations. """ if isinstance(latent_vars, list): with tf.variable_scope(None, default_name="posterior"): latent_vars = { z: Empirical(params=tf.Variable( tf.zeros([1e4] + z.batch_shape.concatenate( z.event_shape).as_list()))) for z in latent_vars } elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance(qz, Empirical): raise TypeError( "Posterior approximation must consist of only " "Empirical random variables.") elif len(qz.sample_shape) != 0: raise ValueError( "Empirical posterior approximations must have " "a scalar sample shape.") super(MonteCarlo, self).__init__(latent_vars, data)
def main(_): ed.set_seed(42) # DATA x_data = np.array([0.0] * 50) # MODEL: Normal-Normal with known variance mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) # INFERENCE qmu = Empirical(params=tf.get_variable("qmu/params", [1000], initializer=tf.zeros_initializer())) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qmu.mean(), qmu.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) # Check convergence with visual diagnostics. samples = sess.run(qmu.params) # Plot histogram. plt.hist(samples, bins='auto') plt.show() # Trace plot. plt.plot(samples) plt.show()
def test_hmc_custom(self): with self.test_session() as sess: x = TransformedDistribution( distribution=Normal(1.0, 1.0), bijector=tf.contrib.distributions.bijectors.Softplus()) x.support = 'nonnegative' qx = Empirical(tf.Variable(tf.random_normal([1000]))) inference = ed.HMC({x: qx}) inference.initialize(auto_transform=True, step_size=0.8) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() # Check approximation on constrained space has same moments as # target distribution. n_samples = 10000 x_unconstrained = inference.transformations[x] qx_constrained_params = x_unconstrained.bijector.inverse(qx.params) x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0) qx_mean, qx_var = tf.nn.moments(qx_constrained_params[500:], 0) stats = sess.run([x_mean, qx_mean, x_var, qx_var]) self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1) self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
def main(_): # Data generation (known mean) xn_data = np.random.normal(FLAGS.loc, FLAGS.scale, FLAGS.N) print("scale: {}".format(FLAGS.scale)) # Prior definition alpha = 0.5 beta = 0.7 # Posterior inference # Probabilistic model ig = InverseGamma(alpha, beta) xn = Normal(FLAGS.loc, tf.sqrt(ig), sample_shape=FLAGS.N) # Inference qig = Empirical(params=tf.get_variable( "qig/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_ig = InverseGamma(2.0, 2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print("Inferred scale: {}".format(sess.run(tf.sqrt(qig.mean()))))
def bayes_mult_cmd(table_file, metadata_file, formula, output_file): #metadata = _type_cast_to_float(metadata.copy()) metadata = pd.read_table(metadata_file, index_col=0) G_data = dmatrix(formula, metadata, return_type='dataframe') table = load_table(table_file) # basic filtering parameters soil_filter = lambda val, id_, md: id_ in metadata.index read_filter = lambda val, id_, md: np.sum(val) > 10 #sparse_filter = lambda val, id_, md: np.mean(val > 0) > 0.1 sample_filter = lambda val, id_, md: np.sum(val) > 1000 table = table.filter(soil_filter, axis='sample') table = table.filter(sample_filter, axis='sample') table = table.filter(read_filter, axis='observation') #table = table.filter(sparse_filter, axis='observation') print(table.shape) y_data = pd.DataFrame(np.array(table.matrix_data.todense()).T, index=table.ids(axis='sample'), columns=table.ids(axis='observation')) y_data, G_data = y_data.align(G_data, axis=0, join='inner') psi = _gram_schmidt_basis(y_data.shape[1]) G_data = G_data.values y_data = y_data.values N, D = y_data.shape p = G_data.shape[1] # number of covariates r = G_data.shape[1] # rank of covariance matrix psi = tf.convert_to_tensor(psi, dtype=tf.float32) n = tf.convert_to_tensor(y_data.sum(axis=1), dtype=tf.float32) # hack to get multinomial working def _sample_n(self, n=1, seed=None): # define Python function which returns samples as a Numpy array def np_sample(p, n): return multinomial.rvs(p=p, n=n, random_state=seed).astype(np.float32) # wrap python function as tensorflow op val = tf.py_func(np_sample, [self.probs, n], [tf.float32])[0] # set shape from unknown shape batch_event_shape = self.batch_shape.concatenate(self.event_shape) shape = tf.concat( [tf.expand_dims(n, 0), tf.convert_to_tensor(batch_event_shape)], 0) val = tf.reshape(val, shape) return val Multinomial._sample_n = _sample_n # dummy variable for gradient G = tf.placeholder(tf.float32, [N, p]) b = Exponential(rate=1.0) B = Normal(loc=tf.zeros([p, D-1]), scale=tf.ones([p, D-1]) ) # Factorization of covariance matrix # http://edwardlib.org/tutorials/klqp l = Exponential(rate=1.0) L = Normal(loc=tf.zeros([p, D-1]), scale=tf.ones([p, D-1]) ) z = Normal(loc=tf.zeros([N, p]), scale=tf.ones([N, p])) # Cholesky trick to get multivariate normal v = tf.matmul(G, B) + tf.matmul(z, L) # get clr transformed values eta = tf.matmul(v, psi) Y = Multinomial(total_count=n, logits=eta) T = 100000 # the number of mixin samples from MCMC sampling qb = PointMass(params=tf.Variable(tf.random_normal([]))) qB = PointMass(params=tf.Variable(tf.random_normal([p, D-1]))) qz = Empirical(params=tf.Variable(tf.random_normal([T, N, p]))) ql = PointMass(params=tf.Variable(tf.random_normal([]))) qL = PointMass(params=tf.Variable(tf.random_normal([p, D-1]))) # Imputation inference_z = ed.SGLD( {z: qz}, data={G: G_data, Y: y_data, B: qB, L: qL} ) # Maximization inference_BL = ed.MAP( {B: qB, L: qL, b: qb, l: ql}, data={G: G_data, Y: y_data, z: qz} ) inference_z.initialize(step_size=1e-10) inference_BL.initialize(n_iter=1000) sess = ed.get_session() saver = tf.train.Saver() tf.global_variables_initializer().run() for i in range(inference_BL.n_iter): inference_z.update() # e-step # will need to compute the expectation of z info_dict = inference_BL.update() # m-step inference_BL.print_progress(info_dict) save_path = saver.save(sess, output_file) print("Model saved in file: %s" % save_path) pickle.dump({'qB': sess.run(qB.mean()), 'qL': sess.run(qL.mean()), 'qz': sess.run(qz.mean())}, open(output_file + '.params.pickle', 'wb') )
ed.set_seed(42) N = 40 # number of data points D = 1 # number of features X_train, y_train = build_toy_dataset(N) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) b = Normal(loc=tf.zeros([]), scale=1.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # inference T = 5000 qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T]))) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() # criticism & set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) n_samples = 50 inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, None]) # Regression output y = Normal(loc=nn(x, W_0, b_0, W_1, b_1), scale=std_out * tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.float32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=disc * leap_size, n_steps=step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=disc * leap_size, friction=0.4, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1)
def ed_graph_init(): # Graph for prior distributions if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, D]) # Regression likelihood y = Normal(loc=nn(x, W_0, b_0, W_1, b_1), scale=std_out * tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.float32, [None]) # Graph for posterior distribution if str(sys.argv[4]) == 'normal': qW_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, D, n_hidden]))) qW_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden, K], stddev=std * (n_hidden**-.5)))) qb_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, n_hidden]))) qb_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, K], stddev=std * (n_hidden**-.5)))) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': # Use a placeholder otherwise cannot assign a tensor > 2GB w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) # Build inference graph if str(sys.argv[3]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1) else: return (x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference
Wb = Normal(loc=tf.zeros([Db]), scale=tf.ones([Db])) Ib = Normal(loc=tf.zeros(1), scale=tf.ones(1)) Xnew = tf.placeholder(tf.float32, shape=(None, D)) Znew = tf.placeholder(tf.float32, shape=(None, Db)) ynew = tf.placeholder(tf.float32, shape=(None, )) sigma2 = InverseGamma(concentration=tf.ones(1) * .1, rate=tf.ones(1) * .1) #sigma2 = Normal(loc=tf.zeros([1]), scale=tf.ones([1])*100) y = Normal(loc=ed.dot(x_train, Wf) + ed.dot(z_train, Wb) + Ib, scale=tf.log(sigma2)) # INFERENCE sess = ed.get_session() T = 10000 qi = Empirical(params=tf.Variable(tf.zeros([T, 1]))) qw = Empirical(params=tf.Variable(tf.zeros([T, D]))) qb = Empirical(params=tf.Variable(tf.zeros([T, Db]))) qsigma2 = Empirical(params=tf.Variable(tf.ones([T, 1]))) inference = ed.SGHMC({ Wf: qw, Wb: qb, Ib: qi, sigma2: qsigma2 }, data={y: y_train}) inference.run(step_size=.0005) f, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True) ax1.plot(qi.get_variables()[0].eval())
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**-.5 * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**-.5 * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=10 * n_hidden**(-.5) * tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=10 * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]), loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [N]) # Use a placeholder for the pre-trained posteriors p0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) p1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden]) p2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) pp0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp1 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp2 = tf.placeholder(tf.float32, [n_samp, K]) w0 = tf.Variable(p0) w1 = tf.Variable(p1) w2 = tf.Variable(p2) b0 = tf.Variable(pp0) b1 = tf.Variable(pp1) b2 = tf.Variable(pp2) # Empirical distribution qW_0 = Empirical(params=w0) qW_1 = Empirical(params=w1) qW_2 = Empirical(params=w2) qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) qb_2 = Empirical(params=b2) if str(sys.argv[3]) == 'hmc': inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100, scale={y: float(mnist.train.num_examples) / N}) return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, inference, p0, p1, p2, pp0, pp1, pp2, w0, w1, w2, b0, b1, b2)
def __init__(self, latent_vars=None, data=None, model_wrapper=None): """Initialization. Parameters ---------- latent_vars : list of RandomVariable or dict of RandomVariable to RandomVariable Collection of random variables to perform inference on. If list, each random variable will be implictly approximated using a ``Empirical`` random variable that is defined internally (with support matching each random variable). If dictionary, each random variable must be a ``Empirical`` random variable. data : dict, optional Data dictionary which binds observed variables (of type `RandomVariable`) to their realizations (of type `tf.Tensor`). It can also bind placeholders (of type `tf.Tensor`) used in the model to their realizations. model_wrapper : ed.Model, optional A wrapper for the probability model. If specified, the random variables in `latent_vars`' dictionary keys are strings used accordingly by the wrapper. `data` is also changed. For TensorFlow, Python, and Stan models, the key type is a string; for PyMC3, the key type is a Theano shared variable. For TensorFlow, Python, and PyMC3 models, the value type is a NumPy array or TensorFlow tensor; for Stan, the value type is the type according to the Stan program's data block. Examples -------- Most explicitly, MonteCarlo is specified via a dictionary: >>> qpi = Empirical(params=tf.Variable(tf.zeros([T, K-1]))) >>> qmu = Empirical(params=tf.Variable(tf.zeros([T, K*D]))) >>> qsigma = Empirical(params=tf.Variable(tf.zeros([T, K*D]))) >>> MonteCarlo({pi: qpi, mu: qmu, sigma: qsigma}, data) The inferred posterior is comprised of ``Empirical`` random variables with ``T`` samples. We also automate the specification of ``Empirical`` random variables. One can pass in a list of latent variables instead: >>> MonteCarlo([beta], data) >>> MonteCarlo([pi, mu, sigma], data) It defaults to Empirical random variables with 10,000 samples for each dimension. However, for model wrappers, lists are not supported, e.g., >>> MonteCarlo(['z'], data, model_wrapper) This is because internally with model wrappers, we have no way of knowing the dimensions in which to infer each latent variable. One must explicitly pass in the Empirical random variables. Notes ----- The number of Monte Carlo iterations is set according to the minimum of all Empirical sizes. Initialization is assumed from params[0, :]. This generalizes initializing randomly and initializing from user input. Updates are along this outer dimension, where iteration t updates params[t, :] in each Empirical random variable. No warm-up is implemented. Users must run MCMC for a long period of time, then manually burn in the Empirical random variable. """ if isinstance(latent_vars, list): with tf.variable_scope("posterior"): if model_wrapper is None: latent_vars = { rv: Empirical(params=tf.Variable( tf.zeros([1e4] + rv.get_batch_shape().as_list()))) for rv in latent_vars } else: raise NotImplementedError( "A list is not supported for model " "wrappers. See documentation.") elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance(qz, Empirical): raise TypeError( "Posterior approximation must consist of only " "Empirical random variables.") super(MonteCarlo, self).__init__(latent_vars, data, model_wrapper)
x_train = build_toy_dataset(N) pi = Dirichlet(tf.ones(K)) mu = Normal(tf.zeros(D), tf.ones(D), sample_shape=K) sigmasq = InverseGamma(tf.ones(D), tf.ones(D), sample_shape=K) x = ParamMixture(pi, { 'loc': mu, 'scale_diag': tf.sqrt(sigmasq) }, MultivariateNormalDiag, sample_shape=N) z = x.cat T = 500 # number of MCMC samples qpi = Empirical( tf.get_variable("qpi/params", [T, K], initializer=tf.constant_initializer(1.0 / K))) qmu = Empirical( tf.get_variable("qmu/params", [T, K, D], initializer=tf.zeros_initializer())) qsigmasq = Empirical( tf.get_variable("qsigmasq/params", [T, K, D], initializer=tf.ones_initializer())) qz = Empirical( tf.get_variable("qz/params", [T, N], initializer=tf.zeros_initializer(), dtype=tf.int32)) inference = ed.Gibbs({ pi: qpi, mu: qmu,
import numpy as np import tensorflow as tf from edward.models import Empirical, Normal ed.set_seed(42) # DATA x_data = np.array([0.0] * 50, dtype=np.float32) # MODEL: Normal-Normal with known variance mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) # INFERENCE qmu = Empirical(params=tf.Variable(tf.zeros([1000]))) proposal_mu = Normal(mu=0.0, sigma=tf.sqrt(1.0 / 51.0)) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.MetropolisHastings({mu: qmu}, {mu: proposal_mu}, data={x: x_data}) inference.run() # CRITICISM # Check convergence with visual diagnostics. sess = ed.get_session() mean, std = sess.run([qmu.mean(), qmu.std()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior std:")
print('\n') # for i in range(max(N,10)): # print(formulas[i].eval()) ##Observations #data = tf.constant("AAAAAAAAAAAAAAAB", shape=(N,)) #data = tf.constant("B", shape=(N,)) # #data = tf.constant("C", shape=(N,)) #??? #data = tf.constant(0, shape=(N,)) #data = tf.constant(20, shape=(N,)) data = np.ones((N,))*17 ##Infer: T=10000 qtheta = Empirical(params=tf.Variable(0.5+tf.zeros([T]))) #Why need tf.Variable here? tf.summary.scalar('qtheta', qtheta) #proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,)) # proposal_theta = Normal(loc=theta,scale=0.5) # inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {formulas: data}) sess = ed.get_session() inference = ed.HMC({theta: qtheta}, {formulas: data}) inference.initialize() tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict)
f_W=dic['weights'] f_b=dic['bias'] logits = tf.matmul(tf.cast(xtrain, tf.float32), f_W) + f_b return tf.nn.softmax(logits) with tf.name_scope("model"): weights= Normal(loc=tf.ones([dim,nb_classes]),scale=tf.ones([dim,nb_classes]),name='weights') bias = Normal(loc=tf.zeros([nb_classes]),scale=tf.ones([nb_classes]),name='bias') dic={'weights':weights,'bias':bias} X= tf.placeholder(tf.float32, shape=[None, dim]) y=tf.identity(Categorical(Softmax(X,dic)),name="y") with tf.name_scope("posterior"): Nsamples=1000 with tf.name_scope("qweights"): qweights = Empirical(params=tf.Variable(tf.random_normal([Nsamples,dim,nb_classes]))) with tf.name_scope("qbias"): qbias=Empirical(params=tf.Variable(tf.random_normal([Nsamples,nb_classes]))) N=100 x = tf.placeholder(tf.float32, shape=[None, dim]) y_ph = tf.placeholder(tf.int32, shape=[None]) inference = ed.SGHMC({weights:qweights,bias:qbias},data={y:y_ph}) inference.initialize(n_iter=1000, n_print=100,step_size=1e-1, friction=1.0) sess = tf.InteractiveSession() tf.global_variables_initializer().run()
# for i in range(10): # print(x.eval()) ##Observations: #data=tf.ones(10, dtype=tf.int32) #NOT WORKING! data = [1, 1, 1, 1, 1, 1, 1, 1, 0, 1] ##Infer: #Variational #qtheta = Beta(tf.Variable(1.0), tf.Variable(1.0)) #Why need tf.Variable here? # inference = ed.KLqp({theta: qtheta}, {x: data}) # inference.run(n_samples=5, n_iter=1000) #MonteCarlo T = 10000 qtheta = Empirical( params=tf.Variable(0.5 + tf.zeros([T, 1])) ) #Beta(tf.Variable(1.0), tf.Variable(1.0)) #Why need tf.Variable here? #proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,)) #proposal_theta = Normal(loc=theta,scale=0.5) #inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {x: data}) inference = ed.HMC({theta: qtheta}, {x: data}) inference.run() ##Results: qtheta_samples = qtheta.sample(1000).eval() print(qtheta_samples.mean()) plt.hist(qtheta_samples) plt.show()
import numpy as np import tensorflow as tf from edward.models import Bernoulli, Beta, Empirical ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # INFERENCE qp = Empirical(params=tf.Variable(tf.zeros([1000]) + 0.5)) proposal_p = Beta(3.0, 9.0) inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) inference.run() # CRITICISM # exact posterior has mean 0.25 and std 0.12 sess = ed.get_session() mean, stddev = sess.run([qp.mean(), qp.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev)
import tensorflow as tf from edward.models import InverseGamma, Normal, Empirical N = 1000 # Data generation (known mean) mu = 7.0 sigma = 0.7 xn_data = np.random.normal(mu, sigma, N) print('sigma={}'.format(sigma)) # Prior definition alpha = tf.Variable(0.5, dtype=tf.float32, trainable=False) beta = tf.Variable(0.7, dtype=tf.float32, trainable=False) # Posterior inference # Probabilistic model ig = InverseGamma(alpha=alpha, beta=beta) xn = Normal(mu=mu, sigma=tf.ones([N]) * tf.sqrt(ig)) # Inference qig = Empirical(params=tf.Variable(tf.zeros(1000) + 0.5)) proposal_ig = InverseGamma(alpha=2.0, beta=2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print('Inferred sigma={}'.format(sess.run(tf.sqrt(qig.mean()))))
from edward.models import Bernoulli, Normal, Empirical from scipy.special import expit # DATA nsubj = 200 nitem = 25 trait_true = np.random.normal(size=[nsubj, 1]) thresh_true = np.random.normal(size=[1, nitem]) X_data = np.random.binomial(1, expit(trait_true - thresh_true)) # MODEL trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1])) thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem])) X = Bernoulli(logits=tf.sub(trait, thresh)) # INFERENCE T = 5000 # number of posterior samples q_trait = Empirical(params=tf.Variable(tf.zeros([T, nsubj, 1]))) q_thresh = Empirical(params=tf.Variable(tf.zeros([T, 1, nitem]))) inference = ed.HMC({trait: q_trait, thresh: q_thresh}, data={X: X_data}) inference.run(step_size=0.1) # CRITICISM # Check that the inferred posterior mean captures the true traits. plt.scatter(trait_true, q_trait.mean().eval()) plt.show() print("MSE between true traits and inferred posterior mean:") print(np.mean(np.square(trait_true - q_trait.mean().eval())))
def __init__(self, latent_vars=None, data=None): """Initialization. Parameters ---------- latent_vars : list or dict, optional Collection of random variables (of type ``RandomVariable`` or ``tf.Tensor``) to perform inference on. If list, each random variable will be approximated using a ``Empirical`` random variable that is defined internally (with unconstrained support). If dictionary, each value in the dictionary must be a ``Empirical`` random variable. data : dict, optional Data dictionary which binds observed variables (of type ``RandomVariable`` or ``tf.Tensor``) to their realizations (of type ``tf.Tensor``). It can also bind placeholders (of type ``tf.Tensor``) used in the model to their realizations. Examples -------- Most explicitly, ``MonteCarlo`` is specified via a dictionary: >>> qpi = Empirical(params=tf.Variable(tf.zeros([T, K-1]))) >>> qmu = Empirical(params=tf.Variable(tf.zeros([T, K*D]))) >>> qsigma = Empirical(params=tf.Variable(tf.zeros([T, K*D]))) >>> ed.MonteCarlo({pi: qpi, mu: qmu, sigma: qsigma}, data) The inferred posterior is comprised of ``Empirical`` random variables with ``T`` samples. We also automate the specification of ``Empirical`` random variables. One can pass in a list of latent variables instead: >>> ed.MonteCarlo([beta], data) >>> ed.MonteCarlo([pi, mu, sigma], data) It defaults to ``Empirical`` random variables with 10,000 samples for each dimension. Notes ----- The number of Monte Carlo iterations is set according to the minimum of all ``Empirical`` sizes. Initialization is assumed from ``params[0, :]``. This generalizes initializing randomly and initializing from user input. Updates are along this outer dimension, where iteration t updates ``params[t, :]`` in each ``Empirical`` random variable. No warm-up is implemented. Users must run MCMC for a long period of time, then manually burn in the Empirical random variable. """ if isinstance(latent_vars, list): with tf.variable_scope("posterior"): latent_vars = { rv: Empirical(params=tf.Variable( tf.zeros([1e4] + rv.get_batch_shape().as_list()))) for rv in latent_vars } elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance(qz, Empirical): raise TypeError( "Posterior approximation must consist of only " "Empirical random variables.") super(MonteCarlo, self).__init__(latent_vars, data)
os.makedirs(DATA_DIR) if not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) # DATA mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) x_train, _ = mnist.train.next_batch(N) # MODEL z = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE T = int(100 * 1000) qz = Empirical(params=tf.Variable(tf.random_normal([T, N, d]))) inference_e = ed.HMC({z: qz}, data={x: x_train}) inference_e.initialize() inference_m = ed.MAP(data={x: x_train, z: tf.gather(qz.params, inference_e.t)}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference_m.initialize(optimizer=optimizer) init = tf.global_variables_initializer() init.run() n_iter_per_epoch = 100 n_epoch = T // n_iter_per_epoch for epoch in range(n_epoch): avg_loss = 0.0
""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Empirical, MultivariateNormalFull ed.set_seed(42) # MODEL z = MultivariateNormalFull( mu=tf.ones(2), sigma=tf.constant([[1.0, 0.8], [0.8, 1.0]])) # INFERENCE qz = Empirical(params=tf.Variable(tf.random_normal([2000, 2]))) inference = ed.SGLD({z: qz}) inference.run(step_size=5.0) # CRITICISM sess = ed.get_session() mean, std = sess.run([qz.mean(), qz.std()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior std:") print(std)
def build(self,Y): self.var = Empirical(Y) return self