Ejemplo n.º 1
0
    def _test_linear_regression(self, default, dtype):
        def build_toy_dataset(N, w, noise_std=0.1):
            D = len(w)
            x = np.random.randn(N, D)
            y = np.dot(x, w) + np.random.normal(0, noise_std, size=N)
            return x, y

        with self.test_session() as sess:
            N = 40  # number of data points
            D = 10  # number of features

            w_true = np.random.randn(D)
            X_train, y_train = build_toy_dataset(N, w_true)
            X_test, y_test = build_toy_dataset(N, w_true)

            X = tf.placeholder(dtype, [N, D])
            w = Normal(loc=tf.zeros(D, dtype=dtype),
                       scale=tf.ones(D, dtype=dtype))
            b = Normal(loc=tf.zeros(1, dtype=dtype),
                       scale=tf.ones(1, dtype=dtype))
            y = Normal(loc=ed.dot(X, w) + b,
                       scale=0.1 * tf.ones(N, dtype=dtype))

            n_samples = 2000
            if not default:
                qw = Empirical(
                    tf.Variable(tf.zeros([n_samples, D], dtype=dtype)))
                qb = Empirical(
                    tf.Variable(tf.zeros([n_samples, 1], dtype=dtype)))
                inference = ed.HMC({
                    w: qw,
                    b: qb
                },
                                   data={
                                       X: X_train,
                                       y: y_train
                                   })
            else:
                inference = ed.HMC([w, b], data={X: X_train, y: y_train})
                qw = inference.latent_vars[w]
                qb = inference.latent_vars[b]
            inference.run(step_size=0.01)

            self.assertAllClose(qw.mean().eval(), w_true, rtol=5e-1, atol=5e-1)
            self.assertAllClose(qb.mean().eval(), [0.0], rtol=5e-1, atol=5e-1)

            old_t, old_n_accept = sess.run([inference.t, inference.n_accept])
            if not default:
                self.assertEqual(old_t, n_samples)
            else:
                self.assertEqual(old_t, 1e4)
            self.assertGreater(old_n_accept, 0.1)
            sess.run(inference.reset)
            new_t, new_n_accept = sess.run([inference.t, inference.n_accept])
            self.assertEqual(new_t, 0)
            self.assertEqual(new_n_accept, 0)
Ejemplo n.º 2
0
def main(_):
  ed.set_seed(42)

  # MODEL
  z = MultivariateNormalTriL(
      loc=tf.ones(2),
      scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]])))

  # INFERENCE
  qz = Empirical(params=tf.get_variable("qz/params", [1000, 2]))

  inference = ed.HMC({z: qz})
  inference.run()

  # CRITICISM
  sess = ed.get_session()
  mean, stddev = sess.run([qz.mean(), qz.stddev()])
  print("Inferred posterior mean:")
  print(mean)
  print("Inferred posterior stddev:")
  print(stddev)

  fig, ax = plt.subplots()
  trace = sess.run(qz.params)
  ax.scatter(trace[:, 0], trace[:, 1], marker=".")
  mvn_plot_contours(z, ax=ax)
  plt.show()
Ejemplo n.º 3
0
def main(_):
  # data
  J = 8
  data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
  data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])

  # model definition
  mu = Normal(0., 10.)
  logtau = Normal(5., 1.)
  theta_prime = Normal(tf.zeros(J), tf.ones(J))
  sigma = tf.placeholder(tf.float32, J)
  y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J]))

  data = {y: data_y, sigma: data_sigma}

  # ed.KLqp inference
  with tf.variable_scope('q_logtau'):
    q_logtau = Normal(tf.get_variable('loc', []),
                      tf.nn.softplus(tf.get_variable('scale', [])))

  with tf.variable_scope('q_mu'):
    q_mu = Normal(tf.get_variable('loc', []),
                  tf.nn.softplus(tf.get_variable('scale', [])))

  with tf.variable_scope('q_theta_prime'):
    q_theta_prime = Normal(tf.get_variable('loc', [J]),
                           tf.nn.softplus(tf.get_variable('scale', [J])))

  inference = ed.KLqp({logtau: q_logtau, mu: q_mu,
                      theta_prime: q_theta_prime}, data=data)
  inference.run(n_samples=15, n_iter=60000)
  print("====  ed.KLqp inference ====")
  print("E[mu] = %f" % (q_mu.mean().eval()))
  print("E[logtau] = %f" % (q_logtau.mean().eval()))
  print("E[theta_prime]=")
  print((q_theta_prime.mean().eval()))
  print("====  end ed.KLqp inference ====")
  print("")
  print("")

  # HMC inference
  S = 400000
  burn = S // 2

  hq_logtau = Empirical(tf.get_variable('hq_logtau', [S]))
  hq_mu = Empirical(tf.get_variable('hq_mu', [S]))
  hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J]))

  inference = ed.HMC({logtau: hq_logtau, mu: hq_mu,
                     theta_prime: hq_theta_prime}, data=data)
  inference.run()

  print("====  ed.HMC inference ====")
  print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean()))
  print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean()))
  print("E[theta_prime]=")
  print(hq_theta_prime.params.eval()[burn:, ].mean(0))
  print("====  end ed.HMC inference ====")
  print("")
  print("")
    def test_hmc_default(self):
        with self.test_session() as sess:
            x = TransformedDistribution(
                distribution=Normal(1.0, 1.0),
                bijector=tf.contrib.distributions.bijectors.Softplus())
            x.support = 'nonnegative'

            inference = ed.HMC([x])
            inference.initialize(auto_transform=True, step_size=0.8)
            tf.global_variables_initializer().run()
            for _ in range(inference.n_iter):
                info_dict = inference.update()
                inference.print_progress(info_dict)

            # Check approximation on constrained space has same moments as
            # target distribution.
            n_samples = 10000
            x_unconstrained = inference.transformations[x]
            qx = inference.latent_vars[x_unconstrained]
            qx_constrained = Empirical(
                x_unconstrained.bijector.inverse(qx.params))
            x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0)
            qx_mean, qx_var = tf.nn.moments(qx_constrained.params[500:], 0)
            stats = sess.run([x_mean, qx_mean, x_var, qx_var])
            self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1)
            self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
Ejemplo n.º 5
0
    def test_hmc(self):
        with self.test_session():
            N, D, W_1, W_2, W_3, b_1, b_2, X, y, X_train, y_train = self._test(
            )

            T = 1  # number of MCMC samples
            qW_1 = Empirical(params=tf.Variable(tf.random_normal([T, D, 20])))
            qW_2 = Empirical(params=tf.Variable(tf.random_normal([T, 20, 15])))
            qW_3 = Empirical(params=tf.Variable(tf.random_normal([T, 15, 1])))
            qb_1 = Empirical(params=tf.Variable(tf.random_normal([T, 20])))
            qb_2 = Empirical(params=tf.Variable(tf.random_normal([T, 15])))

            inference = ed.HMC(
                {
                    W_1: qW_1,
                    b_1: qb_1,
                    W_2: qW_2,
                    b_2: qb_2,
                    W_3: qW_3
                },
                data={
                    y: y_train,
                    X: X_train
                })
            inference.run()
Ejemplo n.º 6
0
def ed_graph_2(disc=1):
	# Priors
	if str(sys.argv[4]) == 'laplace':
		W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2/D)*tf.ones([D, n_hidden]))
		W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2/n_hidden)*tf.ones([n_hidden, K]))
		b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2/D)*tf.ones(n_hidden))
		b_1 = Laplace(loc=tf.zeros(K), scale=(std**2/n_hidden)*tf.ones(K))

	if str(sys.argv[4]) == 'normal':
		W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std*D**(-.5)*tf.ones([D, n_hidden]))
		W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*n_hidden**(-.5)*tf.ones([n_hidden, K]))
		b_0 = Normal(loc=tf.zeros(n_hidden), scale=std*D**(-.5)*tf.ones(n_hidden))
		b_1 = Normal(loc=tf.zeros(K), scale=std*n_hidden**(-.5)*tf.ones(K))

	if str(sys.argv[4]) == 'T':
		W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden]))
		W_1 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K]))
		b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden))
		b_1 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K))

	x = tf.placeholder(tf.float32, [None, None])
	y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1))
	# We use a placeholder for the labels in anticipation of the traning data.
	y_ph = tf.placeholder(tf.int32, [None])

	# Use a placeholder for the pre-trained posteriors
	w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden])
	w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K])
	b0 = tf.placeholder(tf.float32, [n_samp, n_hidden])
	b1 = tf.placeholder(tf.float32, [n_samp, K])

	# Empirical distribution 
	qW_0 = Empirical(params=tf.Variable(w0))
	qW_1 = Empirical(params=tf.Variable(w1))
	qb_0 = Empirical(params=tf.Variable(b0))
	qb_1 = Empirical(params=tf.Variable(b1))
	
	if str(sys.argv[3]) == 'hmc':	
		inference = ed.HMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph})
	if str(sys.argv[3]) == 'sghmc':	
		inference = ed.SGHMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph})

	# Initialse the inference variables
	if str(sys.argv[3]) == 'hmc':
		inference.initialize(step_size = disc*leap_size, n_steps = step_no, n_print=100)
	if str(sys.argv[3]) == 'sghmc':
		inference.initialize(step_size = disc*leap_size, friction=disc**2*0.1, n_print=100)
	
	return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference,
		w0, w1, b0, b1)
Ejemplo n.º 7
0
    def _test_normal_normal(self, default, dtype):
        with self.test_session() as sess:
            x_data = np.array([0.0] * 50, dtype=np.float32)

            mu = Normal(loc=tf.constant(0.0, dtype=dtype),
                        scale=tf.constant(1.0, dtype=dtype))
            x = Normal(loc=mu,
                       scale=tf.constant(1.0, dtype=dtype),
                       sample_shape=50)

            n_samples = 2000
            # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
            if not default:
                qmu = Empirical(
                    params=tf.Variable(tf.ones(n_samples, dtype=dtype)))
                inference = ed.HMC({mu: qmu}, data={x: x_data})
            else:
                inference = ed.HMC([mu], data={x: x_data})
                qmu = inference.latent_vars[mu]
            inference.run()

            self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1)
            self.assertAllClose(qmu.stddev().eval(),
                                np.sqrt(1 / 51),
                                rtol=1e-1,
                                atol=1e-1)

            old_t, old_n_accept = sess.run([inference.t, inference.n_accept])
            if not default:
                self.assertEqual(old_t, n_samples)
            else:
                self.assertEqual(old_t, 1e4)
            self.assertGreater(old_n_accept, 0.1)
            sess.run(inference.reset)
            new_t, new_n_accept = sess.run([inference.t, inference.n_accept])
            self.assertEqual(new_t, 0)
            self.assertEqual(new_n_accept, 0)
Ejemplo n.º 8
0
  def test_normalnormal_run(self):
    with self.test_session() as sess:
      x_data = np.array([0.0] * 50, dtype=np.float32)

      mu = Normal(loc=0.0, scale=1.0)
      x = Normal(loc=tf.ones(50) * mu, scale=1.0)

      qmu = Empirical(params=tf.Variable(tf.ones(2000)))

      # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
      inference = ed.HMC({mu: qmu}, data={x: x_data})
      inference.run()

      self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-2, atol=1e-2)
      self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51),
                          rtol=1e-2, atol=1e-2)
Ejemplo n.º 9
0
    def test_indexedslices(self):
        """Test that gradients accumulate when tf.gradients doesn't return
    tf.Tensor (IndexedSlices)."""
        with self.test_session() as sess:
            N = 10  # number of data points
            K = 2  # number of clusters
            T = 1  # number of MCMC samples

            x_data = np.zeros(N, dtype=np.float32)

            mu = Normal(0.0, 1.0, sample_shape=K)
            c = Categorical(logits=tf.zeros(N))
            x = Normal(tf.gather(mu, c), tf.ones(N))

            qmu = Empirical(params=tf.Variable(tf.ones([T, K])))
            qc = Empirical(params=tf.Variable(tf.ones([T, N])))

            inference = ed.HMC({mu: qmu}, data={x: x_data})
            inference.initialize()
Ejemplo n.º 10
0
def main(_):
  # DATA
  trait_true = np.random.normal(size=[FLAGS.nsubj, 1])
  thresh_true = np.random.normal(size=[1, FLAGS.nitem])
  X_data = np.random.binomial(1, expit(trait_true - thresh_true))

  # MODEL
  trait = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.nsubj, 1])
  thresh = Normal(loc=0.0, scale=1.0, sample_shape=[1, FLAGS.nitem])
  X = Bernoulli(logits=trait - thresh)

  # INFERENCE
  q_trait = Empirical(params=tf.get_variable("q_trait/params",
                                             [FLAGS.T, FLAGS.nsubj, 1]))
  q_thresh = Empirical(params=tf.get_variable("q_thresh/params",
                                              [FLAGS.T, 1, FLAGS.nitem]))

  inference = ed.HMC({trait: q_trait, thresh: q_thresh}, data={X: X_data})
  inference.run(step_size=0.1)

  # Alternatively, use variational inference.
  # q_trait = Normal(
  #     loc=tf.get_variable("q_trait/loc", [FLAGS.nsubj, 1]),
  #     scale=tf.nn.softplus(
  #         tf.get_variable("q_trait/scale", [FLAGS.nsubj, 1])))
  # q_thresh = Normal(
  #     loc=tf.get_variable("q_thresh/loc", [1, FLAGS.nitem]),
  #     scale=tf.nn.softplus(
  #         tf.get_variable("q_thresh/scale", [1, FLAGS.nitem])))

  # inference = ed.KLqp({trait: q_trait, thresh: q_thresh}, data={X: X_data})
  # inference.run(n_iter=2500, n_samples=10)

  # CRITICISM
  # Check that the inferred posterior mean captures the true traits.
  plt.scatter(trait_true, q_trait.mean().eval())
  plt.show()

  print("MSE between true traits and inferred posterior mean:")
  print(np.mean(np.square(trait_true - q_trait.mean().eval())))
    def run(self, data, method="klqp", **kwargs):

        if method == "klqp":
            print(">> Initializing ... ", end="")
            inference = ed.KLqp(self.unwind_latent_vars(), data=data)
            inference.initialize(**kwargs)
            print("ok")

            # RUNNING THE INFERENCE
            sess = ed.get_session()
            init = tf.global_variables_initializer()
            init.run()
            losses = []
            for _ in tqdm(range(inference.n_iter)):
                info_dict = inference.update()
                losses.append(info_dict['loss'])
            plt.figure(figsize=(7, 3))
            plt.title("Loss")
            plt.semilogy(losses)
            plt.show()

        elif method == "hmc":
            print(">> Initializing ... ", end="")
            inference = ed.HMC(self.unwind_latent_vars(), data=data)
            inference.initialize(**kwargs)
            print("ok")

            # RUNNING THE INFERENCE
            sess = ed.get_session()
            init = tf.global_variables_initializer()
            init.run()
            acceptance_rates = []
            for _ in tqdm(range(inference.n_iter)):
                info_dict = inference.update()
                acceptance_rates.append(info_dict['accept_rate'])
            plt.figure(figsize=(7, 3))
            plt.title("Acceptance Rate")
            plt.semilogy(acceptance_rates)
            plt.show()
Ejemplo n.º 12
0
def main(_):
  ed.set_seed(42)

  # DATA
  x_data = np.array([0.0] * 50)

  # MODEL: Normal-Normal with known variance
  mu = Normal(loc=0.0, scale=1.0)
  x = Normal(loc=mu, scale=1.0, sample_shape=50)

  # INFERENCE
  qmu = Empirical(params=tf.get_variable("qmu/params", [1000],
                                         initializer=tf.zeros_initializer()))

  # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
  inference = ed.HMC({mu: qmu}, data={x: x_data})
  inference.run()

  # CRITICISM
  sess = ed.get_session()
  mean, stddev = sess.run([qmu.mean(), qmu.stddev()])
  print("Inferred posterior mean:")
  print(mean)
  print("Inferred posterior stddev:")
  print(stddev)

  # Check convergence with visual diagnostics.
  samples = sess.run(qmu.params)

  # Plot histogram.
  plt.hist(samples, bins='auto')
  plt.show()

  # Trace plot.
  plt.plot(samples)
  plt.show()
Ejemplo n.º 13
0
# tf.select = tf.where

ed.set_seed(42)

# MODEL
#tf_x = tf.Variable(x.T, trainable=False)
tf_x = tf.Variable(x.T, trainable=False, dtype=tf.float32)

# Standard normal prior on coefficients
#beta = ed.models.Normal(mu=tf.zeros(D), sigma=tf.ones(D))
beta = ed.models.Normal(loc=tf.zeros(D), scale=tf.ones(D))

logit_pred = tf.squeeze(tf.matmul(tf.expand_dims(beta, 0), tf_x))
#ed_y = ed.models.BernoulliWithSigmoidP(p=logit_pred)
ed_y = ed.models.Bernoulli(logits=logit_pred)

# INFERENCE
qbeta = ed.models.Empirical(params=tf.Variable(tf.zeros([n_iterations, D])))
#inference = ed.HMC({beta:qbeta}, data={ed_y:y})
inference = ed.HMC({beta: qbeta}, data={ed_y: y.astype(int)})
t0 = time.time()
inference.run(step_size=step_size, n_steps=n_steps)

ed_time = time.time() - t0

print 'Edward took %.3f seconds' % ed_time

sess = ed.get_session()
ed_samples = sess.run(qbeta.params)
#plot(ed_samples[:, 0])
    def train(self, X_train, y_train, X_val, is_print=True):
        ''' set up BNN and run HMC inference '''
        def neural_network(X):
            # set up the BNN structure using tf

            if self.activation_fn == 'relu':
                h = tf.maximum(tf.matmul(X, W_0) + b_0, 0)  # relu
            elif self.activation_fn == 'Lrelu':
                a = 0.2
                h = tf.maximum(
                    tf.matmul(X, W_0) + b_0,
                    a * (tf.matmul(X, W_0) + b_0))  # leakly relu
            elif self.activation_fn == 'erf':
                h = tf.erf(tf.matmul(X, W_0) + b_0)
            elif self.activation_fn == 'tanh':
                h = tf.tanh(tf.matmul(X, W_0) + b_0)
                # h = tf.tanh(1.23*tf.matmul(X, W_0) + b_0) # add 1.23 for close to GP erf
            elif self.activation_fn == 'sigmoid':
                h = tf.sigmoid(tf.matmul(X, W_0) + b_0)
            elif self.activation_fn == 'softplus':
                self.c = 2.  # if this is bigger -> relu behaviour, but less 'soft'
                h = tf.divide(
                    tf.log(
                        tf.exp(tf.multiply(tf.matmul(X, W_0) + b_0, c)) + 1),
                    c)
            elif self.activation_fn == 'rbf':
                self.beta_2 = 1 / (2 * self.g_var)
                h = tf.exp(-self.beta_2 * tf.square(X - W_0))

            h = tf.matmul(h, W_1)  #+ b_1
            return tf.reshape(h, [-1])

        def neural_network_deep(X):
            # set up the BNN structure using tf

            if self.activation_fn == 'relu':
                h1 = tf.maximum(tf.matmul(X, W_0) + b_0, 0)  # relu
                h = tf.maximum(tf.matmul(h1, W_1) + b_1, 0)  # relu
            elif self.activation_fn == 'Lrelu':
                a = 0.2
                h1 = tf.maximum(
                    tf.matmul(X, W_0) + b_0,
                    a * (tf.matmul(X, W_0) + b_0))  # leakly relu
                h = tf.maximum(
                    tf.matmul(h1, W_1) + b_1,
                    a * (tf.matmul(h1, W_1) + b_1))  # leakly relu
            elif self.activation_fn == 'erf':
                h1 = tf.erf(tf.matmul(X, W_0) + b_0)
                h = tf.erf(tf.matmul(h1, W_1) + b_1)
            else:
                raise Exception('tp: activation not implemented')

            h = tf.matmul(h, W_2)  #+ b_2
            return tf.reshape(h, [-1])

        if self.activation_fn == 'relu' or self.activation_fn == 'softplus' or self.activation_fn == 'Lrelu':
            init_stddev_0_w = np.sqrt(self.w_0_var)  # /d_in
            init_stddev_0_b = np.sqrt(self.b_0_var)  # /d_in
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  #*np.sqrt(10) # 2nd layer init. dist
        elif self.activation_fn == 'tanh' or self.activation_fn == 'erf':
            init_stddev_0_w = np.sqrt(
                self.w_0_var)  # 1st layer init. dist for weights
            init_stddev_0_b = np.sqrt(self.b_0_var)  # for bias
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  # 2nd layer init. dist
        elif self.activation_fn == 'rbf':
            init_stddev_0_w = np.sqrt(self.u_var)  # centres = sig_u
            init_stddev_0_b = np.sqrt(self.g_var)  # fixed /beta
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  # 2nd layer init. dist

        n = X_train.shape[0]
        X_dim = X_train.shape[1]
        y_dim = 1  #y_train.shape[1]

        with tf.name_scope("model"):
            W_0 = Normal(loc=tf.zeros([X_dim, self.hidden_size]),
                         scale=init_stddev_0_w *
                         tf.ones([X_dim, self.hidden_size]),
                         name="W_0")
            if self.deep_NN == False:
                W_1 = Normal(loc=tf.zeros([self.hidden_size, y_dim]),
                             scale=init_stddev_1_w *
                             tf.ones([self.hidden_size, y_dim]),
                             name="W_1")
                b_0 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_0_b * tf.ones(self.hidden_size),
                             name="b_0")
                b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_1")
            else:
                W_1 = Normal(
                    loc=tf.zeros([self.hidden_size, self.hidden_size]),
                    scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]),
                    name="W_1")
                b_0 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_0_b * tf.ones(self.hidden_size),
                             name="b_0")
                W_2 = Normal(loc=tf.zeros([self.hidden_size, y_dim]),
                             scale=init_stddev_1_w *
                             tf.ones([self.hidden_size, y_dim]),
                             name="W_2")
                b_1 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_1_w * tf.ones(self.hidden_size),
                             name="b_1")
                b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2")

            X = tf.placeholder(tf.float32, [n, X_dim], name="X")
            if self.deep_NN == False:
                y = Normal(loc=neural_network(X),
                           scale=np.sqrt(self.data_noise) * tf.ones(n),
                           name="y")
            else:
                y = Normal(loc=neural_network_deep(X),
                           scale=np.sqrt(self.data_noise) * tf.ones(n),
                           name="y")
        # inference
        if self.deep_NN == False:
            qW_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, X_dim,
                                      self.hidden_size])))
            qW_1 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size,
                                      y_dim])))
            qb_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_1 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim])))
        else:
            qW_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, X_dim,
                                      self.hidden_size])))
            qW_1 = Empirical(
                tf.Variable(
                    tf.zeros(
                        [self.n_samples, self.hidden_size, self.hidden_size])))
            qW_2 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size,
                                      y_dim])))
            qb_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_1 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_2 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim])))

        # get some priors
        ### !!! TODO, turn this into a proper function
        # X_pred = X_val.astype(np.float32).reshape((X_val.shape[0], 1))
        # self.y_priors = tf.stack([nn_predict(X_pred, W_0.sample(), W_1.sample(),b_0.sample(), b_1.sample())
        # 	for _ in range(10)])

        # Neal 2012
        # Too large a stepsize will result in a very low acceptance rate for states
        # proposed by simulating trajectories. Too small a stepsize will either waste
        # computation time, by the same factor as the stepsize is too small, or (worse)
        # will lead to slow exploration by a random walk,

        # https://stats.stackexchange.com/questions/304942/how-to-set-step-size-in-hamiltonian-monte-carlo
        # If ϵ is too large, then there will be large discretisation error and low acceptance, if ϵ
        # is too small then more expensive leapfrog steps will be required to move large distances.
        # Ideally we want the largest possible value of ϵ
        # that gives reasonable acceptance probability. Unfortunately this may vary for different values of the target variable.
        # A simple heuristic to set this may be to do a preliminary run with fixed L,
        # gradually increasing ϵ until the acceptance probability is at an appropriate level.

        # Setting the trajectory length by trial and error therefore seems necessary.
        # For a problem thought to be fairly difficult, a trajectory with L = 100 might be a
        # suitable starting point. If preliminary runs (with a suitable ε; see above) show that HMC
        # reaches a nearly independent point after only one iteration, a smaller value of L might be
        # tried next. (Unless these “preliminary” runs are actually sufficient, in which case there is
        # of course no need to do more runs.) If instead there is high autocorrelation in the run
        # with L = 100, runs with L = 1000 might be tried next
        # It may also be advisable to randomly sample ϵ
        # and L form suitable ranges to avoid the possibility of having paths that are close to periodic as this would slow mixing.

        if self.deep_NN == False:
            inference = ed.HMC({
                W_0: qW_0,
                b_0: qb_0,
                W_1: qW_1,
                b_1: qb_1
            },
                               data={
                                   X: X_train,
                                   y: y_train.ravel()
                               })
        else:
            inference = ed.HMC(
                {
                    W_0: qW_0,
                    b_0: qb_0,
                    W_1: qW_1,
                    b_1: qb_1,
                    W_2: qW_2,
                    b_2: qb_2
                },
                data={
                    X: X_train,
                    y: y_train.ravel()
                })
        inference.run(step_size=self.step_size,
                      n_steps=self.n_steps)  # logdir='log'

        # drop first chunk of burn in samples
        if self.deep_NN == False:
            self.qW_0_keep = qW_0.params[self.burn_in:].eval()
            self.qW_1_keep = qW_1.params[self.burn_in:].eval()
            self.qb_0_keep = qb_0.params[self.burn_in:].eval()
            self.qb_1_keep = qb_1.params[self.burn_in:].eval()
        else:
            self.qW_0_keep = qW_0.params[self.burn_in:].eval()
            self.qW_1_keep = qW_1.params[self.burn_in:].eval()
            self.qb_0_keep = qb_0.params[self.burn_in:].eval()
            self.qW_2_keep = qW_2.params[self.burn_in:].eval()
            self.qb_1_keep = qb_1.params[self.burn_in:].eval()
            self.qb_2_keep = qb_2.params[self.burn_in:].eval()

        return
def ed_graph_init():
    # Graph for prior distributions
    if str(sys.argv[4]) == 'laplace':
        W_0 = Laplace(loc=tf.zeros([D, n_hidden]),
                      scale=tf.ones([D, n_hidden]))
        W_1 = Laplace(loc=tf.zeros([n_hidden, K]),
                      scale=(std**2 / n_hidden) * tf.ones([n_hidden, K]))
        b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden))
        b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K))
    if str(sys.argv[4]) == 'normal':
        W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden]))
        W_1 = Normal(loc=tf.zeros([n_hidden, K]),
                     scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K]))
        b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden))
        b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K))
    if str(sys.argv[4]) == 'T':
        W_0 = StudentT(df=df * tf.ones([D, n_hidden]),
                       loc=tf.zeros([D, n_hidden]),
                       scale=tf.ones([D, n_hidden]))
        W_1 = StudentT(df=df * tf.ones([n_hidden, K]),
                       loc=tf.zeros([n_hidden, K]),
                       scale=std**2 / n_hidden * tf.ones([n_hidden, K]))
        b_0 = StudentT(df=df * tf.ones([n_hidden]),
                       loc=tf.zeros(n_hidden),
                       scale=tf.ones(n_hidden))
        b_1 = StudentT(df=df * tf.ones([K]),
                       loc=tf.zeros(K),
                       scale=std**2 / n_hidden * tf.ones(K))
    # Inputs
    x = tf.placeholder(tf.float32, [None, D])
    # Regression likelihood
    y = Normal(loc=nn(x, W_0, b_0, W_1, b_1),
               scale=std_out * tf.ones([tf.shape(x)[0]]))
    # We use a placeholder for the labels in anticipation of the traning data.
    y_ph = tf.placeholder(tf.float32, [None])

    # Graph for posterior distribution
    if str(sys.argv[4]) == 'normal':
        qW_0 = Empirical(
            params=tf.Variable(tf.random_normal([n_samp, D, n_hidden])))
        qW_1 = Empirical(params=tf.Variable(
            tf.random_normal([n_samp, n_hidden, K],
                             stddev=std * (n_hidden**-.5))))
        qb_0 = Empirical(
            params=tf.Variable(tf.random_normal([n_samp, n_hidden])))
        qb_1 = Empirical(params=tf.Variable(
            tf.random_normal([n_samp, K], stddev=std * (n_hidden**-.5))))
    if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T':
        # Use a placeholder otherwise cannot assign a tensor > 2GB
        w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden])
        w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K])
        b0 = tf.placeholder(tf.float32, [n_samp, n_hidden])
        b1 = tf.placeholder(tf.float32, [n_samp, K])
        # Empirical distribution
        qW_0 = Empirical(params=tf.Variable(w0))
        qW_1 = Empirical(params=tf.Variable(w1))
        qb_0 = Empirical(params=tf.Variable(b0))
        qb_1 = Empirical(params=tf.Variable(b1))
    # Build inference graph
    if str(sys.argv[3]) == 'hmc':
        inference = ed.HMC({
            W_0: qW_0,
            b_0: qb_0,
            W_1: qW_1,
            b_1: qb_1
        },
                           data={y: y_ph})
    if str(sys.argv[3]) == 'sghmc':
        inference = ed.SGHMC({
            W_0: qW_0,
            b_0: qb_0,
            W_1: qW_1,
            b_1: qb_1
        },
                             data={y: y_ph})

    # Initialse the inference variables
    if str(sys.argv[3]) == 'hmc':
        inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100)
    if str(sys.argv[3]) == 'sghmc':
        inference.initialize(step_size=leap_size, friction=0.4, n_print=100)

    if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T':
        return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1,
                inference, w0, w1, b0, b1)
    else:
        return (x,
                y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference
Ejemplo n.º 16
0
from edward.models import Bernoulli, Normal, Empirical
from scipy.special import expit

# DATA
nsubj = 200
nitem = 25
trait_true = np.random.normal(size=[nsubj, 1])
thresh_true = np.random.normal(size=[1, nitem])
X_data = np.random.binomial(1, expit(trait_true - thresh_true))

# MODEL
trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1]))
thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem]))
X = Bernoulli(logits=tf.sub(trait, thresh))

# INFERENCE
T = 5000  # number of posterior samples
q_trait = Empirical(params=tf.Variable(tf.zeros([T, nsubj, 1])))
q_thresh = Empirical(params=tf.Variable(tf.zeros([T, 1, nitem])))

inference = ed.HMC({trait: q_trait, thresh: q_thresh}, data={X: X_data})
inference.run(step_size=0.1)

# CRITICISM
# Check that the inferred posterior mean captures the true traits.
plt.scatter(trait_true, q_trait.mean().eval())
plt.show()

print("MSE between true traits and inferred posterior mean:")
print(np.mean(np.square(trait_true - q_trait.mean().eval())))
Ejemplo n.º 17
0
"""Correlated normal posterior. Inference with Hamiltonian Monte Carlo.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import tensorflow as tf

from edward.models import Empirical, MultivariateNormalFull

ed.set_seed(42)

# MODEL
z = MultivariateNormalFull(mu=tf.ones(2),
                           sigma=tf.constant([[1.0, 0.8], [0.8, 1.0]]))

# INFERENCE
qz = Empirical(params=tf.Variable(tf.random_normal([1000, 2])))

inference = ed.HMC({z: qz})
inference.run()

# CRITICISM
sess = ed.get_session()
mean, std = sess.run([qz.mean(), qz.std()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior std:")
print(std)
Ejemplo n.º 18
0
from edward.models import Empirical, Normal

ed.set_seed(42)

# DATA
x_data = np.array([0.0] * 50)

# MODEL: Normal-Normal with known variance
mu = Normal(mu=0.0, sigma=1.0)
x = Normal(mu=tf.ones(50) * mu, sigma=1.0)

# INFERENCE
qmu = Empirical(params=tf.Variable(tf.zeros(1000)))

# analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140)
inference = ed.HMC({mu: qmu}, data={x: x_data})
inference.run()

# CRITICISM
sess = ed.get_session()
mean, std = sess.run([qmu.mean(), qmu.std()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior std:")
print(std)

# Check convergence with visual diagnostics.
samples = sess.run(qmu.params)

# Plot histogram.
plt.hist(samples, bins='auto')
Ejemplo n.º 19
0
#data = tf.constant("C", shape=(N,)) #???
#data = tf.constant(0, shape=(N,))
#data = tf.constant(20, shape=(N,))
data = np.ones((N,))*17

##Infer:
T=10000
qtheta = Empirical(params=tf.Variable(0.5+tf.zeros([T]))) #Why need tf.Variable here?
tf.summary.scalar('qtheta', qtheta)

#proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,))
# proposal_theta = Normal(loc=theta,scale=0.5)
# inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {formulas: data})

sess = ed.get_session()
inference = ed.HMC({theta: qtheta}, {formulas: data})
inference.initialize()

tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
  info_dict = inference.update()
  inference.print_progress(info_dict)

inference.finalize()
train_writer = tf.summary.FileWriter('/tmp/tensorflow/',sess.graph)

# qtheta = Beta(tf.Variable(1.0), tf.Variable(1.0))  #Why need tf.Variable here?
# inference = ed.KLqp({theta: qtheta}, {formulas: data})

Ejemplo n.º 20
0
        w0 = tf.Variable(p0)
        w1 = tf.Variable(p1)
        b0 = tf.Variable(pp0)
        b1 = tf.Variable(pp1)
        # Empirical distribution
        qW_0 = Empirical(params=w0)
        qW_1 = Empirical(params=w1)
        qb_0 = Empirical(params=b0)
        qb_1 = Empirical(params=b1)

    if str(sys.argv[2]) == 'hmc':
        inference = ed.HMC({
            W_0: qW_0,
            b_0: qb_0,
            W_1: qW_1,
            b_1: qb_1
        },
                           data={y: y_ph})
    if str(sys.argv[2]) == 'sghmc':
        inference = ed.SGHMC({
            W_0: qW_0,
            b_0: qb_0,
            W_1: qW_1,
            b_1: qb_1
        },
                             data={y: y_ph})

# Initialse the infernce variables
if str(sys.argv[2]) == 'hmc':
    inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100)
def main(_):
    ed.set_seed(42)

    # DATA
    X_train, y_train = build_toy_dataset(FLAGS.N)

    # MODEL
    X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D])
    w = Normal(loc=tf.zeros(FLAGS.D), scale=3.0 * tf.ones(FLAGS.D))
    b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([]))
    y = Bernoulli(logits=ed.dot(X, w) + b)

    # INFERENCE
    qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D]))
    qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T]))

    inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train})
    inference.initialize(n_print=10, step_size=0.6)

    # Alternatively, use variational inference.
    # qw_loc = tf.get_variable("qw_loc", [FLAGS.D])
    # qw_scale = tf.nn.softplus(tf.get_variable("qw_scale", [FLAGS.D]))
    # qb_loc = tf.get_variable("qb_loc", []) + 10.0
    # qb_scale = tf.nn.softplus(tf.get_variable("qb_scale", []))

    # qw = Normal(loc=qw_loc, scale=qw_scale)
    # qb = Normal(loc=qb_loc, scale=qb_scale)

    # inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train})
    # inference.initialize(n_print=10, n_iter=600)

    tf.global_variables_initializer().run()

    # Set up figure.
    fig = plt.figure(figsize=(8, 8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    # Build samples from inferred posterior.
    n_samples = 50
    inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
    probs = tf.stack([
        tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample())
        for _ in range(n_samples)
    ])

    for t in range(inference.n_iter):
        info_dict = inference.update()
        inference.print_progress(info_dict)

        if t % inference.n_print == 0:
            outputs = probs.eval()

            # Plot data and functions
            plt.cla()
            ax.plot(X_train[:], y_train, 'bx')
            for s in range(n_samples):
                ax.plot(inputs[:], outputs[s], alpha=0.2)

            ax.set_xlim([-5, 3])
            ax.set_ylim([-0.5, 1.5])
            plt.draw()
            plt.pause(1.0 / 60.0)
Ejemplo n.º 22
0
def ed_graph_2(disc=1):
    # Priors
    if str(sys.argv[4]) == 'laplace':
        W_0 = Laplace(loc=tf.zeros([D, n_hidden]),
                      scale=(std**2 / D) * tf.ones([D, n_hidden]))
        W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]),
                      scale=(std**2 / n_hidden) *
                      tf.ones([n_hidden, n_hidden]))
        W_2 = Laplace(loc=tf.zeros([n_hidden, K]),
                      scale=(std**2 / n_hidden) * tf.ones([n_hidden, K]))
        b_0 = Laplace(loc=tf.zeros(n_hidden),
                      scale=(std**2 / D) * tf.ones(n_hidden))
        b_1 = Laplace(loc=tf.zeros(n_hidden),
                      scale=(std**2 / n_hidden) * tf.ones(n_hidden))
        b_2 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K))

    if str(sys.argv[4]) == 'normal':
        W_0 = Normal(loc=tf.zeros([D, n_hidden]),
                     scale=std * D**-.5 * tf.ones([D, n_hidden]))
        W_1 = Normal(loc=tf.zeros([n_hidden, K]),
                     scale=std * n_hidden**-.5 * tf.ones([n_hidden, K]))
        W_2 = Normal(loc=tf.zeros([n_hidden, K]),
                     scale=std * n_hidden**-.5 * tf.ones([n_hidden, K]))
        b_0 = Normal(loc=tf.zeros(n_hidden),
                     scale=std * D**-.5 * tf.ones(n_hidden))
        b_1 = Normal(loc=tf.zeros(n_hidden),
                     scale=10 * n_hidden**(-.5) * tf.ones(n_hidden))
        b_2 = Normal(loc=tf.zeros(K), scale=10 * n_hidden**(-.5) * tf.ones(K))

    if str(sys.argv[4]) == 'T':
        W_0 = StudentT(df=df * tf.ones([D, n_hidden]),
                       loc=tf.zeros([D, n_hidden]),
                       scale=(std**2 / D) * tf.ones([D, n_hidden]))
        W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]),
                       loc=tf.zeros([n_hidden, n_hidden]),
                       scale=(std**2 / n_hidden) *
                       tf.ones([n_hidden, n_hidden]))
        W_2 = StudentT(df=df * tf.ones([n_hidden, K]),
                       loc=tf.zeros([n_hidden, K]),
                       scale=(std**2 / n_hidden) * tf.ones([n_hidden, K]))
        b_0 = StudentT(df=df * tf.ones([n_hidden]),
                       loc=tf.zeros(n_hidden),
                       scale=(std**2 / D) * tf.ones(n_hidden))
        b_1 = StudentT(df=df * tf.ones([n_hidden]),
                       loc=tf.zeros(n_hidden),
                       scale=(std**2 / n_hidden) * tf.ones(n_hidden))
        b_2 = StudentT(df=df * tf.ones([K]),
                       loc=tf.zeros(K),
                       scale=(std**2 / n_hidden) * tf.ones(K))

    x = tf.placeholder(tf.float32, [None, None])
    y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2))
    # We use a placeholder for the labels in anticipation of the traning data.
    y_ph = tf.placeholder(tf.int32, [N])

    # Use a placeholder for the pre-trained posteriors
    p0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden])
    p1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden])
    p2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K])
    pp0 = tf.placeholder(tf.float32, [n_samp, n_hidden])
    pp1 = tf.placeholder(tf.float32, [n_samp, n_hidden])
    pp2 = tf.placeholder(tf.float32, [n_samp, K])

    w0 = tf.Variable(p0)
    w1 = tf.Variable(p1)
    w2 = tf.Variable(p2)
    b0 = tf.Variable(pp0)
    b1 = tf.Variable(pp1)
    b2 = tf.Variable(pp2)
    # Empirical distribution
    qW_0 = Empirical(params=w0)
    qW_1 = Empirical(params=w1)
    qW_2 = Empirical(params=w2)
    qb_0 = Empirical(params=b0)
    qb_1 = Empirical(params=b1)
    qb_2 = Empirical(params=b2)

    if str(sys.argv[3]) == 'hmc':
        inference = ed.HMC(
            {
                W_0: qW_0,
                b_0: qb_0,
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2
            },
            data={y: y_ph})
    if str(sys.argv[3]) == 'sghmc':
        inference = ed.SGHMC(
            {
                W_0: qW_0,
                b_0: qb_0,
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2
            },
            data={y: y_ph})

    # Initialse the inference variables
    if str(sys.argv[3]) == 'hmc':
        inference.initialize(step_size=leap_size,
                             n_steps=step_no,
                             n_print=100,
                             scale={y: float(mnist.train.num_examples) / N})
    if str(sys.argv[3]) == 'sghmc':
        inference.initialize(step_size=leap_size,
                             friction=0.4,
                             n_print=100,
                             scale={y: float(mnist.train.num_examples) / N})

    return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1,
            qW_2, qb_2, inference, p0, p1, p2, pp0, pp1, pp2, w0, w1, w2, b0,
            b1, b2)
Ejemplo n.º 23
0
#     for i in range(10):
#         print(x.eval())

##Observations:
#data=tf.ones(10, dtype=tf.int32) #NOT WORKING!
data = [1, 1, 1, 1, 1, 1, 1, 1, 0, 1]

##Infer:

#Variational
#qtheta = Beta(tf.Variable(1.0), tf.Variable(1.0))  #Why need tf.Variable here?
# inference = ed.KLqp({theta: qtheta}, {x: data})
# inference.run(n_samples=5, n_iter=1000)

#MonteCarlo
T = 10000
qtheta = Empirical(
    params=tf.Variable(0.5 + tf.zeros([T, 1]))
)  #Beta(tf.Variable(1.0), tf.Variable(1.0))  #Why need tf.Variable here?
#proposal_theta = Beta(concentration1=1.0, concentration0=1.0, sample_shape=(1,))
#proposal_theta = Normal(loc=theta,scale=0.5)
#inference = ed.MetropolisHastings({theta: qtheta}, {theta: proposal_theta}, {x: data})
inference = ed.HMC({theta: qtheta}, {x: data})
inference.run()

##Results:
qtheta_samples = qtheta.sample(1000).eval()
print(qtheta_samples.mean())
plt.hist(qtheta_samples)
plt.show()
Ejemplo n.º 24
0
 def train(self,
           filename,
           total_batches=10,
           discrete_batch_iters=1000,
           continus_batch_iters=10000):
     sess = tf.Session()
     restorer = tf.train.import_meta_graph(filename, clear_devices=True)
     print("<meta graph imported>")
     [
         tf.add_to_collection(
             'd_pi_q',
             Empirical(tf.Variable(tf.zeros(tf.shape(var))),
                       name='Empirical_d_pi_q_' +
                       str.split(str.split(var.name, '/')[0], '_')[-2]))
         for var in tf.get_collection('d_pi')
     ]
     for var in tf.get_collection('c_w'):
         idx = str.split(str.split(var.name, '/')[0], '_')[-2]
         tf.add_to_collection(
             'c_w_q',
             Empirical(tf.Variable(tf.zeros(tf.shape(var))),
                       name='Empirical_c_w_q_' + idx))
         print(var.get_shape().as_list())
         tf.add_to_collection(
             'c_b_q',
             Empirical(tf.Variable(tf.zeros(
                 var.get_shape().as_list()[:-1])),
                       name='Empirical_c_b_q_' + idx))
         tf.add_to_collection(
             'c_sigma_q',
             Empirical(tf.Variable(tf.zeros([1])),
                       name='Empirical_c_sigma_q_' + idx))
     print("<variables collected>")
     variable_map = dict(
         zip(
             tf.get_collection('d') + tf.get_collection('c'),
             self.design_matrix[:,
                                tuple(np.arange(self.num_discrete_variables)
                                      )].flatten('F').tolist() +
             self.design_matrix[:, self.continus_variable_idxs].flatten(
                 'F').tolist()))
     discrete_prior_map = dict(
         zip(tf.get_collection('d_pi'), tf.get_collection('d_pi_q')))
     continus_prior_map = dict(
         zip(
             tf.get_collection('c_w') + tf.get_collection('c_b') +
             tf.get_collection('c_sigma'),
             tf.get_collection('c_w_q') + tf.get_collection('c_b_q') +
             tf.get_collection('c_sigma_q')))
     print("<running inference>")
     inference_d = ed.Gibbs(discrete_prior_map,
                            data=dict(variable_map.items() +
                                      continus_prior_map.items()))
     inference_c = ed.HMC(continus_prior_map,
                          data=dict(variable_map.items() +
                                    discrete_prior_map.items()))
     inference_d.initialize(n_iter=discrete_batch_iters)
     inference_c.initialize(n_iter=continus_batch_iters)
     sess.run(tf.global_variables_initializer())
     for _ in range(total_batches):
         for _ in range(inference_d.n_iter):
             info_dict = inference_d.update()
             inference_d.print_progress(info_dict)
         inference_d.n_iter += discrete_batch_iters
         inference_d.n_print = int(discrete_batch_iters / 10)
         inference_d.progbar = Progbar(inference_d.n_iter)
         for _ in range(inference_c.n_iter):
             info_dict = inference_c.update()
             inference_c.print_progress(info_dict)
         inference_c.n_iter += continus_batch_iters
         inference_c.n_print = int(continus_batch_iters / 10)
         inference_c.progbar = Progbar(inference_c.n_iter)
     inference_d.finalize()
     inference_c.finalize()
     filename = ''.join(str.split(filename, '.')[:-1],
                        '.') + '_trained_model'
     saver = tf.train.Saver()
     saver.save(sess, filename)
     tf.train.export_meta_graph(
         filename + '.meta',
         as_text=True,
         collection_list=['d_pi', 'd', 'c_w', 'c_b', 'c_sigma', 'c'])
N = 40  # number of data points
D = 1  # number of features

X_train, y_train = build_toy_dataset(N)

X = tf.placeholder(tf.float32, [N, D])
w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D))
b = Normal(loc=tf.zeros([]), scale=1.0 * tf.ones([]))
y = Bernoulli(logits=ed.dot(X, w) + b)

# inference
T = 5000
qw = Empirical(params=tf.Variable(tf.random_normal([T, D])))
qb = Empirical(params=tf.Variable(tf.random_normal([T])))

inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train})
inference.initialize(n_print=10, step_size=0.6)

tf.global_variables_initializer().run()

# criticism & set up figure
fig = plt.figure(figsize=(8, 8), facecolor='white')
ax = fig.add_subplot(111, frameon=False)
plt.ion()
plt.show(block=False)

n_samples = 50
inputs = np.linspace(-5, 3, num=400, dtype=np.float32).reshape((400, 1))
probs = tf.stack([
    tf.sigmoid(ed.dot(inputs, qw.sample()) + qb.sample())
    for _ in range(n_samples)
Ejemplo n.º 26
0
    def test_monte_carlo(self):
        tf.InteractiveSession()
        ed.set_seed(42)

        # DATA
        X_train = np.zeros([500, 100])
        y_train = np.zeros(500)

        N = X_train.shape[0]  # data points
        D = X_train.shape[1]  # feature
        T = 1  # number of MCMC samples

        # MODEL
        W_1 = Normal(mu=tf.zeros([D, 20]), sigma=tf.ones([D, 20]) * 100)
        W_2 = Normal(mu=tf.zeros([20, 15]), sigma=tf.ones([20, 15]) * 100)
        W_3 = Normal(mu=tf.zeros([15, 1]), sigma=tf.ones([15, 1]) * 100)
        b_1 = Normal(mu=tf.zeros(20), sigma=tf.ones(20) * 100)
        b_2 = Normal(mu=tf.zeros(15), sigma=tf.ones(15) * 100)

        x_ph = tf.placeholder(tf.float32, [N, D])
        y = Bernoulli(logits=four_layer_nn(x_ph, W_1, W_2, W_3, b_1, b_2))

        # INFERENCE
        qW_1 = Empirical(params=tf.Variable(tf.random_normal([T, D, 20])))
        qW_2 = Empirical(params=tf.Variable(tf.random_normal([T, 20, 15])))
        qW_3 = Empirical(params=tf.Variable(tf.random_normal([T, 15, 1])))
        qb_1 = Empirical(params=tf.Variable(tf.random_normal([T, 20])))
        qb_2 = Empirical(params=tf.Variable(tf.random_normal([T, 15])))

        # note ideally these would be separate test methods; there's an
        # issue with the tensorflow graph when re-running the above
        # unfortunately
        inference = ed.HMC(
            {
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2,
                W_3: qW_3
            },
            data={
                y: y_train,
                x_ph: X_train
            })
        inference.run()

        inference = ed.SGLD(
            {
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2,
                W_3: qW_3
            },
            data={
                y: y_train,
                x_ph: X_train
            })
        inference.run()

        inference = ed.MetropolisHastings(
            {
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2,
                W_3: qW_3
            }, {
                W_1: W_1,
                b_1: b_1,
                W_2: W_2,
                b_2: b_2,
                W_3: W_3
            },
            data={
                y: y_train,
                x_ph: X_train
            })
        inference.run()
import numpy as np
import tensorflow as tf

from edward.models import Bernoulli, Empirical, Normal
from scipy.special import expit

ed.set_seed(123)
N = 5810  # number of data points
D = 54  # number of features

# DATA
w_true = np.random.randn(D)
X_data = np.random.randn(N, D)
p = expit(np.dot(X_data, w_true))
y_data = np.array([np.random.binomial(1, i) for i in p])

# MODEL
X = tf.Variable(X_data.astype(np.float32), trainable=False)
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
y = Bernoulli(logits=ed.dot(X, w))

# INFERENCE
T = 5000
qw = Empirical(params=tf.Variable(tf.zeros([T, D])))
inference = ed.HMC({w: qw}, data={y: y_data})
inference.run(step_size=0.05)

# CRITICISM
print("Mean squared error in true values to inferred posterior mean:")
print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
Ejemplo n.º 28
0
def main(_):
    outdir = setup_outdir()
    ed.set_seed(FLAGS.seed)

    ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data()
    N, D = Xtrain.shape
    N_test, D_test = Xtest.shape

    print("Xtrain")
    print(Xtrain)
    print(Xtrain.shape)

    if 'synthetic' in FLAGS.exp:
        w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D))
        X = tf.placeholder(tf.float32, [N, D])
        y = Bernoulli(logits=ed.dot(X, w))

        #n_posterior_samples = 100000
        n_posterior_samples = 10
        qw_empirical = Empirical(
            params=tf.get_variable("qw/params", [n_posterior_samples, D]))
        inference = ed.HMC({w: qw_empirical}, data={X: Xtrain, y: ytrain})
        inference.initialize(n_print=10, step_size=0.6)

        tf.global_variables_initializer().run()
        inference.run()

        empirical_samples = qw_empirical.sample(50).eval()
        #fig, ax = plt.subplots()
        #ax.scatter(posterior_samples[:,0], posterior_samples[:,1])
        #plt.show()

    weights, q_components = [], []
    ll_trains, ll_tests, bin_ac_trains, bin_ac_tests, elbos, rocs, gaps = [], [], [], [], [], [], []
    total_time, times = 0., []
    for iter in range(0, FLAGS.n_fw_iter):
        print("iter %d" % iter)
        g = tf.Graph()
        with g.as_default():
            sess = tf.InteractiveSession()
            with sess.as_default():
                tf.set_random_seed(FLAGS.seed)
                # MODEL
                w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D))

                X = tf.placeholder(tf.float32, [N, D])
                y = Bernoulli(logits=ed.dot(X, w))

                X_test = tf.placeholder(tf.float32, [N_test, D_test])
                y_test = Bernoulli(logits=ed.dot(X_test, w))

                qw = construct_base_dist([D], iter, 'qw')
                inference_time_start = time.time()
                inference = relbo.KLqp({w: qw},
                                       fw_iterates=get_fw_iterates(
                                           weights, w, q_components),
                                       data={
                                           X: Xtrain,
                                           y: ytrain
                                       },
                                       fw_iter=iter)
                tf.global_variables_initializer().run()
                inference.run(n_iter=FLAGS.LMO_iter)
                inference_time_end = time.time()
                total_time += float(inference_time_end - inference_time_start)

                joint = Joint(Xtrain, ytrain, sess)
                if iter > 0:
                    qtw_prev = build_mixture(weights, q_components)
                    gap = compute_duality_gap(joint, qtw_prev, qw)
                    gaps.append(gap)
                    np.savetxt(os.path.join(outdir, "gaps.csv"),
                               gaps,
                               delimiter=',')
                    print("duality gap", gap)

                # update weights
                gamma = 2. / (iter + 2.)
                weights = [(1. - gamma) * w for w in weights]
                weights.append(gamma)

                # update components
                q_components = update_iterate(q_components, qw)

                if len(q_components) > 1 and FLAGS.fw_variant == 'fc':
                    print("running fully corrective")
                    # overwrite the weights
                    weights = fully_corrective(
                        build_mixture(weights, q_components), joint)

                    if True:
                        # remove inactivate iterates
                        weights = list(weights)
                        for i in reversed(range(len(weights))):
                            if weights[i] == 0:
                                del weights[i]
                                del q_components[i]
                        weights = np.array(
                            weights
                        )  # TODO type acrobatics to make elements deletable
                elif len(q_components
                         ) > 1 and FLAGS.fw_variant == 'line_search':
                    print("running line search")
                    weights = line_search(
                        build_mixture(weights[:-1], q_components[:-1]), qw,
                        joint)

                qtw_new = build_mixture(weights, q_components)

                if False:
                    for i, comp in enumerate(qtw_new.components):
                        print("component", i, "\tmean",
                              comp.mean().eval(), "\tstddev",
                              comp.stddev().eval())

                train_lls = [
                    sess.run(y.log_prob(ytrain),
                             feed_dict={
                                 X: Xtrain,
                                 w: qtw_new.sample().eval()
                             }) for _ in range(50)
                ]
                train_lls = np.mean(train_lls, axis=0)
                ll_trains.append((np.mean(train_lls), np.std(train_lls)))

                test_lls = [
                    sess.run(y_test.log_prob(ytest),
                             feed_dict={
                                 X_test: Xtest,
                                 w: qtw_new.sample().eval()
                             }) for _ in range(50)
                ]
                test_lls = np.mean(test_lls, axis=0)
                ll_tests.append((np.mean(test_lls), np.std(test_lls)))

                logits = np.mean([
                    np.dot(Xtest,
                           qtw_new.sample().eval()) for _ in range(50)
                ],
                                 axis=0)
                ypred = tf.sigmoid(logits).eval()
                roc_score = roc_auc_score(ytest, ypred)
                rocs.append(roc_score)

                print('roc_score', roc_score)
                print('ytrain', np.mean(train_lls), np.std(train_lls))
                print('ytest', np.mean(test_lls), np.std(test_lls))

                order = np.argsort(ytest)
                plt.scatter(range(len(ypred)), ypred[order], c=ytest[order])
                plt.savefig(os.path.join(outdir, 'ypred%d.pdf' % iter))
                plt.close()

                np.savetxt(os.path.join(outdir, "train_lls.csv"),
                           ll_trains,
                           delimiter=',')
                np.savetxt(os.path.join(outdir, "test_lls.csv"),
                           ll_tests,
                           delimiter=',')
                np.savetxt(os.path.join(outdir, "rocs.csv"),
                           rocs,
                           delimiter=',')

                x_post = ed.copy(y, {w: qtw_new})
                x_post_t = ed.copy(y_test, {w: qtw_new})

                print(
                    'log lik train',
                    ed.evaluate('log_likelihood',
                                data={
                                    x_post: ytrain,
                                    X: Xtrain
                                }))
                print(
                    'log lik test',
                    ed.evaluate('log_likelihood',
                                data={
                                    x_post_t: ytest,
                                    X_test: Xtest
                                }))

                #ll_train = ed.evaluate('log_likelihood', data={x_post: ytrain, X:Xtrain})
                #ll_test = ed.evaluate('log_likelihood', data={x_post_t: ytest, X_test:Xtest})
                bin_ac_train = ed.evaluate('binary_accuracy',
                                           data={
                                               x_post: ytrain,
                                               X: Xtrain
                                           })
                bin_ac_test = ed.evaluate('binary_accuracy',
                                          data={
                                              x_post_t: ytest,
                                              X_test: Xtest
                                          })
                print('binary accuracy train', bin_ac_train)
                print('binary accuracy test', bin_ac_test)
                #latest_elbo = elbo(qtw_new, joint, w)

                #foo = ed.KLqp({w: qtw_new}, data={X: Xtrain, y: ytrain})
                #op = myloss(foo)
                #print("myloss", sess.run(op[0], feed_dict={X: Xtrain, y: ytrain}), sess.run(op[1], feed_dict={X: Xtrain, y: ytrain}))

                #append_and_save(ll_trains, ll_train, "loglik_train.csv", np.savetxt)
                #append_and_save(ll_tests, ll_train, "loglik_test.csv", np.savetxt) #append_and_save(bin_ac_trains, bin_ac_train, "bin_acc_train.csv", np.savetxt) #append_and_save(bin_ac_tests, bin_ac_test, "bin_acc_test.csv", np.savetxt)
                ##append_and_save(elbos, latest_elbo, "elbo.csv", np.savetxt)

                #print('log-likelihood train ', ll_train)
                #print('log-likelihood test ', ll_test)
                #print('binary_accuracy train ', bin_ac_train)
                #print('binary_accuracy test ', bin_ac_test)
                #print('elbo', latest_elbo)
                times.append(total_time)
                np.savetxt(os.path.join(setup_outdir(), 'times.csv'), times)

        tf.reset_default_graph()
Ejemplo n.º 29
0
)

# Inference arguments
latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma}
data = {y: y_train}

# Inference
inference = ed.KLqp(latent_vars, data)
inference.run(n_samples=5, n_iter=2500)

print(q_mu.mean().eval())
print(q_inv_softplus_sigma.mean().eval())

# Empirical Model with Sampler

# Posterior distribution families
q_mu = Empirical(params=tf.Variable(tf.random_normal([2000])))
q_inv_softplus_sigma = Empirical(params=tf.Variable(tf.random_normal([2000])))

# Inference arguments
latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma}
data = {y: y_train}

# Inference
inference = ed.HMC(latent_vars, data)
inference.run(step_size=0.003, n_steps=5)

print(tf.reduce_mean(q_mu.params[1000:]).eval())
print(
    tf.nn.softplus(tf.reduce_mean(q_inv_softplus_sigma.params[1000:])).eval())
Ejemplo n.º 30
0
    os.makedirs(IMG_DIR)

# DATA
mnist = input_data.read_data_sets(DATA_DIR, one_hot=True)
x_train, _ = mnist.train.next_batch(N)

# MODEL
z = Normal(mu=tf.zeros([N, d]), sigma=tf.ones([N, d]))
logits = generative_network(z)
x = Bernoulli(logits=logits)

# INFERENCE
T = int(100 * 1000)
qz = Empirical(params=tf.Variable(tf.random_normal([T, N, d])))

inference_e = ed.HMC({z: qz}, data={x: x_train})
inference_e.initialize()

inference_m = ed.MAP(data={x: x_train, z: tf.gather(qz.params, inference_e.t)})
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference_m.initialize(optimizer=optimizer)

init = tf.global_variables_initializer()
init.run()

n_iter_per_epoch = 100
n_epoch = T // n_iter_per_epoch
for epoch in range(n_epoch):
    avg_loss = 0.0

    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]