Ejemplo n.º 1
0
    def _test(self, n, *args, **kwargs):
        rv = ParamMixture(*args, **kwargs)
        val_est = rv.sample(n).shape
        val_true = tf.TensorShape(n).concatenate(
            rv.cat.batch_shape).concatenate(rv.components.event_shape)
        self.assertEqual(val_est, val_true)

        self.assertEqual(rv.sample_shape, rv.cat.sample_shape)
        self.assertEqual(rv.sample_shape, rv.components.sample_shape)
        self.assertEqual(rv.batch_shape, rv.cat.batch_shape)
        self.assertEqual(rv.event_shape, rv.components.event_shape)
Ejemplo n.º 2
0
  def _test(self, n, *args, **kwargs):
    rv = ParamMixture(*args, **kwargs)
    val_est = rv.sample(n).shape
    val_true = tf.TensorShape(n).concatenate(
        rv.cat.batch_shape).concatenate(rv.components.event_shape)
    self.assertEqual(val_est, val_true)

    self.assertEqual(rv.sample_shape, rv.cat.sample_shape)
    self.assertEqual(rv.sample_shape, rv.components.sample_shape)
    self.assertEqual(rv.batch_shape, rv.cat.batch_shape)
    self.assertEqual(rv.event_shape, rv.components.event_shape)
Ejemplo n.º 3
0
 def __init__(self, K, D, N, nu, use_param=False):
     self.K = K  # number of topics
     self.D = D  # number of documents
     self.N = N  # number of words of each document
     self.nu = nu
     self.alpha = alpha = tf.zeros([K]) + 0.1
     self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K)
     self.sigma = sigma = tf.sqrt(self.sigmasq)
     self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K)
     self.theta = theta = [None] * D
     self.z = z = [None] * D
     self.w = w = [None] * D
     for d in range(D):
         theta[d] = Dirichlet(alpha)
         if use_param:
             w[d] = ParamMixture(mixing_weights=theta[d],
                                 component_params={
                                     'loc': mu,
                                     'scale_diag': sigma
                                 },
                                 component_dist=MultivariateNormalDiag,
                                 sample_shape=N[d])
             z[d] = w[d].cat
         else:
             z[d] = Categorical(probs=theta[d], sample_shape=N[d])
             components = [
                 MultivariateNormalDiag(loc=tf.gather(mu, k),
                                        scale_diag=tf.gather(self.sigma, k),
                                        sample_shape=N[d]) for k in range(K)
             ]
             w[d] = Mixture(cat=z[d],
                            components=components,
                            sample_shape=N[d])
Ejemplo n.º 4
0
 def __init__(self, num_data, num_cluster, vector_dim, num_mcmc_sample):
     self.K = num_cluster
     self.D = vector_dim
     self.N = num_data
     self.pi = Dirichlet(tf.ones(self.K))
     self.mu = Normal(tf.zeros(self.D),
                      tf.ones(self.D),
                      sample_shape=self.K)
     self.sigmasq = InverseGamma(tf.ones(self.D),
                                 tf.ones(self.D),
                                 sample_shape=self.K)
     self.x = ParamMixture(self.pi, {
         'loc': self.mu,
         'scale_diag': tf.sqrt(self.sigmasq)
     },
                           MultivariateNormalDiag,
                           sample_shape=self.N)
     self.z = self.x.cat
     self.T = num_mcmc_sample  # number of MCMC samples
     self.qpi = Empirical(tf.Variable(tf.ones([self.T, self.K]) / self.K))
     self.qmu = Empirical(tf.Variable(tf.zeros([self.T, self.K, self.D])))
     self.qsigmasq = Empirical(
         tf.Variable(tf.ones([self.T, self.K, self.D])))
     self.qz = Empirical(
         tf.Variable(tf.zeros([self.T, self.N], dtype=tf.int32)))
Ejemplo n.º 5
0
 def __init__(self, K, V, D, N):
     self.K = K  # number of topics
     self.V = V  # vocabulary size
     self.D = D  # number of documents
     self.N = N  # number of words of each document
     self.alpha = alpha = tf.zeros([K]) + 0.1
     self.yita = yita = tf.zeros([V]) + 0.01
     self.theta = [None] * D
     self.beta = Dirichlet(yita, sample_shape=K)
     self.z = [None] * D
     self.w = [None] * D
     temp = self.beta
     for d in range(D):
         self.theta[d] = Dirichlet(alpha)
         self.w[d] = ParamMixture(mixing_weights=self.theta[d],
                                  component_params={'probs': temp},
                                  component_dist=Categorical,
                                  sample_shape=N[d],
                                  validate_args=False)
         self.z[d] = self.w[d].cat
Ejemplo n.º 6
0
 def __init__(self, K, D, N, nu, use_param=False):
     self.K = K  # number of topics
     self.D = D  # number of documents
     self.N = N  # number of words of each document
     self.nu = nu
     self.alpha = alpha = tf.zeros([K]) + 0.1
     mu0 = tf.constant([0.0] * nu)
     sigma0 = tf.eye(nu)
     self.sigma = sigma = WishartCholesky(
         df=nu,
         scale=sigma0,
         cholesky_input_output_matrices=True,
         sample_shape=K)
     # sigma_inv = tf.matrix_inverse(sigma)
     self.mu = mu = Normal(mu0, tf.ones(nu), sample_shape=K)
     self.theta = theta = [None] * D
     self.z = z = [None] * D
     self.w = w = [None] * D
     for d in range(D):
         theta[d] = Dirichlet(alpha)
         if use_param:
             w[d] = ParamMixture(mixing_weights=theta[d],
                                 component_params={
                                     'loc': mu,
                                     'scale_tril': sigma
                                 },
                                 component_dist=MultivariateNormalTriL,
                                 sample_shape=N[d])
             z[d] = w[d].cat
         else:
             z[d] = Categorical(probs=theta[d], sample_shape=N[d])
             components = [
                 MultivariateNormalTriL(loc=tf.gather(mu, k),
                                        scale_tril=tf.gather(sigma, k),
                                        sample_shape=N[d]) for k in range(K)
             ]
             w[d] = Mixture(cat=z[d],
                            components=components,
                            sample_shape=N[d])
Ejemplo n.º 7
0
true_z = np.random.choice(np.arange(K), size=N, p=true_pi)
x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(true_sigmasq[true_z])

# Prior hyperparameters
pi_alpha = np.ones(K, dtype=np.float32)
mu_sigma = np.std(true_mu)
sigmasq_alpha = 1.0
sigmasq_beta = 2.0

# Model
pi = Dirichlet(pi_alpha)
mu = Normal(0.0, mu_sigma, sample_shape=K)
sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K)
x = ParamMixture(pi, {
    'mu': mu,
    'sigma': tf.sqrt(sigmasq)
},
                 Normal,
                 sample_shape=N)
z = x.cat

# Conditionals
mu_cond = ed.complete_conditional(mu)
sigmasq_cond = ed.complete_conditional(sigmasq)
pi_cond = ed.complete_conditional(pi)
z_cond = ed.complete_conditional(z)

sess = ed.get_session()

# Initialize randomly
pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z])
Ejemplo n.º 8
0
    return x


N = 500  # number of data points
K = 2  # number of components
D = 2  # dimensionality of data
ed.set_seed(42)

x_train = build_toy_dataset(N)

pi = Dirichlet(tf.ones(K))
mu = Normal(tf.zeros(D), tf.ones(D), sample_shape=K)
sigmasq = InverseGamma(tf.ones(D), tf.ones(D), sample_shape=K)
x = ParamMixture(pi, {
    'loc': mu,
    'scale_diag': tf.sqrt(sigmasq)
},
                 MultivariateNormalDiag,
                 sample_shape=N)
z = x.cat
T = 500  # number of MCMC samples
qpi = Empirical(
    tf.get_variable("qpi/params", [T, K],
                    initializer=tf.constant_initializer(1.0 / K)))
qmu = Empirical(
    tf.get_variable("qmu/params", [T, K, D],
                    initializer=tf.zeros_initializer()))
qsigmasq = Empirical(
    tf.get_variable("qsigmasq/params", [T, K, D],
                    initializer=tf.ones_initializer()))
qz = Empirical(
    tf.get_variable("qz/params", [T, N],
Ejemplo n.º 9
0
  def _test(self, pi, params, dist):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(10003)

      N = 50000

      x = ParamMixture(pi, params, dist, sample_shape=N)
      cat = x.cat
      components = x.components

      marginal_logp = x.marginal_log_prob(x)
      cond_logp = x.log_prob(x)

      comp_means = components.mean()
      comp_stddevs = components.std()
      marginal_mean = x.mean()
      marginal_stddev = x.std()
      marginal_var = x.variance()

    sess = self.test_session(graph=g)
    with self.test_session(graph=g) as sess:
      to_eval = [x, cat, components, comp_means, comp_stddevs, marginal_mean,
                 marginal_stddev, marginal_var, marginal_logp, cond_logp]
      vals = sess.run(to_eval)
      vals = {k: v for k, v in zip(to_eval, vals)}

      # Test that marginal statistics are reasonable
      self.assertAllClose(vals[x].mean(0), vals[marginal_mean],
                          rtol=0.01, atol=0.01)
      self.assertAllClose(vals[x].std(0), vals[marginal_stddev],
                          rtol=0.01, atol=0.01)
      self.assertAllClose(vals[x].var(0), vals[marginal_var],
                          rtol=0.01, atol=0.01)

      # Test that per-component statistics are reasonable
      for k in range(x.num_components):
        selector = (vals[cat] == k)
        self.assertAllClose(selector.mean(), pi[k], rtol=0.01, atol=0.01)
        x_k = vals[x][selector]
        self.assertAllClose(x_k.mean(0), vals[comp_means][k],
                            rtol=0.05, atol=0.05)
        self.assertAllClose(x_k.std(0), vals[comp_stddevs][k],
                            rtol=0.05, atol=0.05)

      n_bins = 100
      x_hists = np.zeros((n_bins,) + vals[x].shape[1:])
      hist_centers = np.zeros_like(x_hists)
      x_axis = np.zeros((N,) + vals[x].shape[1:])
      _make_histograms(vals[x], x_hists, hist_centers, x_axis, n_bins)

      x_marginal_val = sess.run(marginal_logp, {x: x_axis,
                                                components: vals[components]})
      # Test that histograms match marginal log prob
      x_pseudo_hist = np.exp(x_marginal_val[:n_bins])
      self.assertAllClose(x_pseudo_hist.sum(0) * (x_axis[1] - x_axis[0]), 1.,
                          rtol=0.1, atol=0.1)
      x_pseudo_hist /= x_pseudo_hist.sum(0, keepdims=True)
      self.assertLess(abs(x_pseudo_hist - x_hists).sum(0).mean(), 0.1)

      # Test that histograms match conditional log prob
      for k in range(pi.shape[-1]):
        k_cat = k + np.zeros(x_axis.shape, np.int32)
        x_vals_k = sess.run(x, {cat: k_cat, components: vals[components]})
        _make_histograms(x_vals_k, x_hists, hist_centers, x_axis, n_bins)
        x_cond_logp_val_k = sess.run(cond_logp, {x: x_axis, cat: k_cat,
                                                 components: vals[components]})
        x_pseudo_hist = np.exp(x_cond_logp_val_k[:n_bins])
        self.assertAllClose(x_pseudo_hist.sum(0) * (x_axis[1] - x_axis[0]), 1.,
                            rtol=0.1, atol=0.1)
        x_pseudo_hist /= x_pseudo_hist.sum(0, keepdims=True)
        self.assertLess(abs(x_pseudo_hist - x_hists).sum(0).mean(), 0.1)
Ejemplo n.º 10
0
def main(_):
    # Generate data
    true_mu = np.array([-1.0, 0.0, 1.0], np.float32) * 10
    true_sigmasq = np.array([1.0**2, 2.0**2, 3.0**2], np.float32)
    true_pi = np.array([0.2, 0.3, 0.5], np.float32)
    N = 10000
    K = len(true_mu)
    true_z = np.random.choice(np.arange(K), size=N, p=true_pi)
    x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(
        true_sigmasq[true_z])

    # Prior hyperparameters
    pi_alpha = np.ones(K, dtype=np.float32)
    mu_sigma = np.std(true_mu)
    sigmasq_alpha = 1.0
    sigmasq_beta = 2.0

    # Model
    pi = Dirichlet(pi_alpha)
    mu = Normal(0.0, mu_sigma, sample_shape=K)
    sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K)
    x = ParamMixture(pi, {
        'loc': mu,
        'scale': tf.sqrt(sigmasq)
    },
                     Normal,
                     sample_shape=N)
    z = x.cat

    # Conditionals
    mu_cond = ed.complete_conditional(mu)
    sigmasq_cond = ed.complete_conditional(sigmasq)
    pi_cond = ed.complete_conditional(pi)
    z_cond = ed.complete_conditional(z)

    sess = ed.get_session()

    # Initialize randomly
    pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z])

    print('Initial parameters:')
    print('pi:', pi_est)
    print('mu:', mu_est)
    print('sigmasq:', sigmasq_est)
    print()

    # Gibbs sampler
    cond_dict = {
        pi: pi_est,
        mu: mu_est,
        sigmasq: sigmasq_est,
        z: z_est,
        x: x_data
    }
    t0 = time()
    T = 500
    for t in range(T):
        z_est = sess.run(z_cond, cond_dict)
        cond_dict[z] = z_est
        pi_est, mu_est = sess.run([pi_cond, mu_cond], cond_dict)
        cond_dict[pi] = pi_est
        cond_dict[mu] = mu_est
        sigmasq_est = sess.run(sigmasq_cond, cond_dict)
        cond_dict[sigmasq] = sigmasq_est
    print('took %.3f seconds to run %d iterations' % (time() - t0, T))

    print()
    print('Final sample for parameters::')
    print('pi:', pi_est)
    print('mu:', mu_est)
    print('sigmasq:', sigmasq_est)
    print()

    print()
    print('True parameters:')
    print('pi:', true_pi)
    print('mu:', true_mu)
    print('sigmasq:', true_sigmasq)
    print()

    plt.figure(figsize=[10, 10])
    plt.subplot(2, 1, 1)
    plt.hist(x_data, 50)
    plt.title('Empirical Distribution of $x$')
    plt.xlabel('$x$')
    plt.ylabel('frequency')
    xl = plt.xlim()
    plt.subplot(2, 1, 2)
    plt.hist(sess.run(x, {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est}), 50)
    plt.title("Predictive distribution $p(x \mid \mathrm{inferred }\ "
              "\pi, \mu, \sigma^2)$")
    plt.xlabel('$x$')
    plt.ylabel('frequency')
    plt.xlim(xl)
    plt.show()
Ejemplo n.º 11
0
    def __init__(self, n, xdim, n_mixtures=5, mc_samples=500):
        # Compute the shape dynamically from placeholders
        self.x_ph = tf.placeholder(tf.float32, [None, xdim])
        self.k = k = n_mixtures
        self.batch_size = n
        self.d = d = xdim
        self.sample_size = tf.placeholder(tf.int32, ())

        # Build the priors over membership probabilities and mixture parameters
        with tf.variable_scope("priors"):
            pi = Dirichlet(tf.ones(k))

            mu = Normal(tf.zeros(d), tf.ones(d), sample_shape=k)
            sigmasq = InverseGamma(tf.ones(d), tf.ones(d), sample_shape=k)

        # Build the conditional mixture model
        with tf.variable_scope("likelihood"):
            x = ParamMixture(pi, {'loc': mu, 'scale_diag': tf.sqrt(sigmasq)},
                             MultivariateNormalDiag,
                             sample_shape=n)
            z = x.cat

        # Build approximate posteriors as Empirical samples
        t = mc_samples
        with tf.variable_scope("posteriors_samples"):
            qpi = Empirical(tf.get_variable(
                "qpi/params", [t, k],
                initializer=tf.constant_initializer(1.0 / k)))
            qmu = Empirical(tf.get_variable(
                "qmu/params", [t, k, d],
                initializer=tf.zeros_initializer()))
            qsigmasq = Empirical(tf.get_variable(
                "qsigmasq/params", [t, k, d],
                initializer=tf.ones_initializer()))
            qz = Empirical(tf.get_variable(
                "qz/params", [t, n],
                initializer=tf.zeros_initializer(),
                dtype=tf.int32))

        # Build inference graph using Gibbs and conditionals
        with tf.variable_scope("inference"):
            self.inference = ed.Gibbs({
                pi: qpi,
                mu: qmu,
                sigmasq: qsigmasq,
                z: qz
            }, data={
                x: self.x_ph
            })
            self.inference.initialize()

        # Build predictive posterior graph by taking samples
        n_samples = self.sample_size
        with tf.variable_scope("posterior"):
            mu_smpl = qmu.sample(n_samples) # shape: [1, 100, k, d]
            sigmasq_smpl = qsigmasq.sample(n_samples)

            x_post = Normal(
                loc=tf.ones((n, 1, 1, 1)) * mu_smpl,
                scale=tf.ones((n, 1, 1, 1)) * tf.sqrt(sigmasq_smpl)
            )
            # NOTE: x_ph has shape [n, d]
            x_broadcasted = tf.tile(
                tf.reshape(self.x_ph, (n, 1, 1, d)),
                (1, n_samples, k, 1)
            )

            x_ll = x_post.log_prob(x_broadcasted)
            x_ll = tf.reduce_sum(x_ll, axis=3)
            x_ll = tf.reduce_mean(x_ll, axis=1)

        self.sample_t_ph = tf.placeholder(tf.int32, ())
        self.eval_ops = {
            'generative_post': x_post,
            'qmu': qmu,
            'qsigma': qsigma,
            'post_running_mu': tf.reduce_mean(
                qmu.params[:self.sample_t_ph],
                axis=0
            )
            'post_log_prob': xll
        }
Ejemplo n.º 12
0
true_z = np.random.choice(np.arange(K), size=N, p=true_pi)
x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(true_sigmasq[true_z])

# Prior hyperparameters
pi_alpha = np.ones(K, dtype=np.float32)
mu_sigma = np.std(true_mu)
sigmasq_alpha = 1.0
sigmasq_beta = 2.0

# Model
pi = Dirichlet(pi_alpha)
mu = Normal(0.0, mu_sigma, sample_shape=K)
sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K)
x = ParamMixture(pi, {
    'loc': mu,
    'scale': tf.sqrt(sigmasq)
},
                 Normal,
                 sample_shape=N)
z = x.cat

# Conditionals
mu_cond = ed.complete_conditional(mu)
sigmasq_cond = ed.complete_conditional(sigmasq)
pi_cond = ed.complete_conditional(pi)
z_cond = ed.complete_conditional(z)

sess = ed.get_session()

# Initialize randomly
pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z])