Exemple #1
0
    def define_val_model(self, N, P, K):
        # Define new graph
        self.z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))
        self.l_test = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        rho_test = tf.matmul(self.z_test, self.W0)
        rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1),
                                         (-1, 1))  # NxP

        self.lam_test = Gamma(self.r, self.r / (rho_test * self.l_test))

        if self.zero_inflation:
            logit_pi_test = tf.matmul(self.z_test, self.W1)

            pi_test = tf.minimum(
                tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7)
            cat_test = Categorical(
                probs=tf.stack([pi_test, 1. - pi_test], axis=2))

            components_test = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=self.lam_test)
            ]
            self.likelihood_test = Mixture(cat=cat_test,
                                           components=components_test)
        else:
            self.likelihood_test = Poisson(rate=self.lam_test)
Exemple #2
0
 def __init__(self, datastore=None, USE_FEEDBACK=USE_FEEDBACK):
     """Set the variables and load model data."""
     self.datastore = datastore
     self.USE_FEEDBACK = convert_string2bool_env(USE_FEEDBACK)
     self.package_id_dict = OrderedDict()
     self.id_package_dict = OrderedDict()
     self.beta = None
     self.theta = None
     self.alpha = None
     self.manifest_id_dict = OrderedDict()
     self.feedback_id_dict = OrderedDict()
     self.manifests = 0
     self.packages = 0
     self.epsilon = Gamma(tf.constant(
         a_c), tf.constant(a_c) / tf.constant(b_c)).\
         prob(tf.constant(K, dtype=tf.float32)).eval(session=tf.Session())
     self.theta_dummy = Poisson(
         np.array([
             self.epsilon * Gamma(tf.constant(a), self.epsilon).prob(
                 tf.constant(K,
                             dtype=tf.float32)).eval(session=tf.Session())
         ] * K,
                  dtype=float))
     if isinstance(datastore, S3DataStore):  # pragma: no-cover
         self.load_s3()
     else:
         self.load_local()
     self.manifests = self.theta.shape[0]
     self.packages = self.beta.shape[0]
     self.dummy_result = self.theta_dummy.prob(
         self.beta).eval(session=tf.Session())
  def test_map_default(self):
    with self.test_session() as sess:
      x = Gamma(2.0, 0.5)

      inference = ed.MAP([x])
      inference.initialize(auto_transform=True, n_iter=500)
      tf.global_variables_initializer().run()
      for _ in range(inference.n_iter):
        info_dict = inference.update()

      # Check approximation on constrained space has same mode as
      # target distribution.
      qx = inference.latent_vars[x]
      stats = sess.run([x.mode(), qx])
      self.assertAllClose(stats[0], stats[1], rtol=1e-5, atol=1e-5)
  def test_map_custom(self):
    with self.test_session() as sess:
      x = Gamma(2.0, 0.5)
      qx = PointMass(tf.nn.softplus(tf.Variable(0.5)))

      inference = ed.MAP({x: qx})
      inference.initialize(auto_transform=True, n_iter=500)
      tf.global_variables_initializer().run()
      for _ in range(inference.n_iter):
        info_dict = inference.update()

      # Check approximation on constrained space has same mode as
      # target distribution.
      stats = sess.run([x.mode(), qx])
      self.assertAllClose(stats[0], stats[1], rtol=1e-5, atol=1e-5)
    def test_map_custom(self):
        with self.test_session() as sess:
            x = Gamma(2.0, 0.5)
            qx = PointMass(tf.nn.softplus(tf.Variable(0.5)))

            inference = ed.MAP({x: qx})
            inference.initialize(auto_transform=True, n_iter=500)
            tf.global_variables_initializer().run()
            for _ in range(inference.n_iter):
                info_dict = inference.update()

            # Check approximation on constrained space has same mode as
            # target distribution.
            stats = sess.run([x.mode(), qx])
            self.assertAllClose(stats[0], stats[1], rtol=1e-5, atol=1e-5)
    def test_map_default(self):
        with self.test_session() as sess:
            x = Gamma(2.0, 0.5)

            inference = ed.MAP([x])
            inference.initialize(auto_transform=True, n_iter=500)
            tf.global_variables_initializer().run()
            for _ in range(inference.n_iter):
                info_dict = inference.update()

            # Check approximation on constrained space has same mode as
            # target distribution.
            qx = inference.latent_vars[x]
            stats = sess.run([x.mode(), qx])
            self.assertAllClose(stats[0], stats[1], rtol=1e-5, atol=1e-5)
Exemple #7
0
 def test_nonnegative(self):
     with self.test_session():
         x = Gamma(1.0, 1.0)
         y = ed.transform(x)
         self.assertIsInstance(y, TransformedDistribution)
         sample = y.sample(10, seed=1).eval()
         self.assertSamplePosNeg(sample)
Exemple #8
0
    def define_stochastic_model(self, P, K):
        M = self.minibatch_size

        self.W0 = Gamma(0.1 * tf.ones([K, P]), 0.3 * tf.ones([K, P]))
        if self.zero_inflation:
            self.W1 = Normal(tf.zeros([K, P]), tf.ones([K, P]))

        self.z = Gamma(2. * tf.ones([M, K]), 1. * tf.ones([M, K]))

        self.r = Gamma(2. * tf.ones([
            P,
        ]), 1. * tf.ones([
            P,
        ]))

        self.l = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([M, 1]),
                                self.std_llib * tf.ones([M, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        self.rho = tf.matmul(self.z, self.W0)
        self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1),
                                         (-1, 1))  # NxP

        self.lam = Gamma(self.r, self.r / (self.rho * self.l))

        if self.zero_inflation:
            self.logit_pi = tf.matmul(self.z, self.W1)
            self.pi = tf.minimum(
                tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7)

            self.cat = Categorical(
                probs=tf.stack([self.pi, 1. - self.pi], axis=2))

            self.components = [
                Poisson(rate=1e-30 * tf.ones([M, P])),
                Poisson(rate=self.lam)
            ]

            self.likelihood = Mixture(cat=self.cat, components=self.components)
        else:
            self.likelihood = Poisson(rate=self.lam)
def gamma_q(shape):
    # Parameterize Gamma q's via shape and scale, with softplus unconstraints.
    min_shape = 1e-3
    min_scale = 1e-5
    shape_init = 0.5 + 0.1 * tf.random_normal(shape)
    scale_init = 0.1 * tf.random_normal(shape)
    rv = Gamma(
        tf.maximum(tf.nn.softplus(tf.Variable(shape_init)), min_shape),
        tf.maximum(1.0 / tf.nn.softplus(tf.Variable(scale_init)),
                   1.0 / min_scale))
    return rv
 def __init__(self, datastore=None,
              scoring_region=HPF_SCORING_REGION):
     """Set the variables and load model data."""
     self.datastore = datastore
     self.scoring_region = scoring_region
     self.package_id_dict = None
     self.id_package_dict = None
     self.rating_matrix = None
     self.beta = None
     self.manifest_id_dict = None
     self.manifests = 0
     self.packages = 0
     self.sess = tf.Session()
     self.epsilon = Gamma(tf.constant(
         a_c), tf.constant(a_c) / tf.constant(b_c)).eval(session=self.sess)
     self.theta = np.array([self.epsilon * Gamma(tf.constant(
         a), self.epsilon).eval(session=self.sess)] * K)
     self.loadS3()
     self.dummy_result = Poisson(
         np.dot(self.theta, np.transpose(self.beta))).eval(session=self.sess)
     self.normalize_result()
Exemple #11
0
def check(dist):

    q_s = Gamma(a_s, b_s)
    q_v = Gamma(a_v, b_v)

    sess = tf.InteractiveSession()
    init = tf.global_variables_initializer()
    init.run()
    no_sample = 100

    s_sample = q_s.sample(no_sample).eval()
    v_sample = q_v.sample(no_sample).eval()

    n = np.zeros([C_u, C_i])
    result = np.zeros([C_u, C_i])
    n_expected = np.zeros([C_u, C_i])

    for i in range(0, no_sample):
        n = np.add(n, np.matmul(s_sample[i], np.transpose(v_sample[i])))
    n_expected = n / no_sample
    #mean of poisson is rate param. So this is fine.

    #sample response
    #distribution specific
    result = dist_mean(dist, n_expected)

    return mae(result)
Exemple #12
0
    def recommend_new_user(self, input_user_stack, k=config.K):
        """Implement the 'fold-in' logic.

        Calculates user factors for a new user and adds the user to the user matrix to make
        prediction.
        """
        # initializing parameters
        _logger.info(
            "Could not find a match, calculating factors to recommend.")
        k_shp = a_c + k * a
        # TODO: t_shp, Why is it not required here?
        nY = len(input_user_stack)
        Y = np.ones(shape=(nY, ))
        seed = np.random.seed(int(time.time()))
        theta = Gamma(a, 1 / b_c).sample(sample_shape=(k, ),
                                         seed=seed).eval(session=tf.Session())
        k_rte = b_c + np.sum(theta)
        gamma_rte = \
            Gamma(a_c, b_c / a_c).sample(
                    sample_shape=(1,), seed=seed).eval(session=tf.Session()) + self.beta.sum(axis=0)
        gamma_shp = \
            gamma_rte * theta * \
            Uniform(low=.85, high=1.15).sample(
                sample_shape=(k,), seed=seed).eval(session=tf.Session())
        np.nan_to_num(gamma_shp, copy=False)
        np.nan_to_num(gamma_rte, copy=False)
        phi = np.empty((nY, k), dtype='float32')
        add_k_rte = a_c / b_c
        theta_prev = theta.copy()
        # running the loop
        for iter_num in range(iter_score):
            for i in range(nY):
                iid = input_user_stack[i]
                sumphi = 10e-6
                maxval = -10**1
                phi_st = i
                for j in range(k):
                    phi[phi_st,
                        j] = psi(gamma_shp[j]) - log(gamma_rte[j]) + psi(
                            self.lam_shp[iid, j]) - log(self.lam_rte[iid, j])
                    if phi[phi_st, j] > maxval:
                        maxval = phi[phi_st, j]
                for j in range(k):
                    phi[phi_st, j] = exp(phi[phi_st, j] - maxval)
                    sumphi += phi[phi_st, j]
                for j in range(k):
                    phi[phi_st, j] *= Y[i] / sumphi
            gamma_rte = (k_shp / k_rte +
                         self.beta.sum(axis=0, keepdims=True)).reshape(-1)
            gamma_shp = a + phi.sum(axis=0)
            theta = gamma_shp / gamma_rte
            k_rte = add_k_rte + theta.sum()
            # checking for early stop
            if np.linalg.norm(theta - theta_prev) < stop_thr:
                break
            theta_prev = theta.copy()
        rec = np.dot(theta, self.beta.T)
        return self.filter_recommendation(rec, input_user_stack)
def gamma_q(shape, name=None):
  # Parameterize Gamma q's via shape and scale, with softplus unconstraints.
    with tf.variable_scope(name, default_name="gamma_q"):
        min_shape = 1e-3
        min_scale = 1e-5
        shape_var = tf.get_variable(
            "shape", shape,
            initializer=tf.random_normal_initializer(mean=0.5, stddev=0.1))
        scale_var = tf.get_variable(
            "scale", shape,
            initializer=tf.random_normal_initializer(stddev=0.1))
        rv = Gamma(tf.maximum(tf.nn.softplus(shape_var), min_shape),
                   tf.maximum(1.0 / tf.nn.softplus(scale_var), 1.0 / min_scale))
        return rv
Exemple #14
0
def main(_):
    ed.set_seed(42)

    # Prior on scalar hyperparameter to Dirichlet.
    alpha = Gamma(1.0, 1.0)

    # Prior on size of Dirichlet.
    n = 1 + tf.cast(Exponential(0.5), tf.int32)

    # Build a vector of ones whose size is n; multiply it by alpha.
    p = Dirichlet(tf.ones([n]) * alpha)

    sess = ed.get_session()
    print(sess.run(p))
    # [ 0.01012419  0.02939712  0.05036638  0.51287931  0.31020424  0.0485355
    #   0.0384932 ]
    print(sess.run(p))
Exemple #15
0
    def define_model(self, N, P, K, batch_idx=None):
        self.W0 = Gamma(.1 * tf.ones([K + self.n_batches, P]),
                        .3 * tf.ones([K + self.n_batches, P]))
        if self.zero_inflation:
            self.W1 = Normal(tf.zeros([K + self.n_batches, P]),
                             tf.ones([K + self.n_batches, P]))

        self.z = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))

        disp_size = 1
        if self.gene_dispersion:
            disp_size = P
        self.r = Gamma(2. * tf.ones([
            disp_size,
        ]), 1. * tf.ones([
            disp_size,
        ]))

        self.l = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if batch_idx is not None and self.n_batches > 0:
            self.rho = tf.matmul(
                tf.concat([
                    self.z,
                    tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                            tf.float32)
                ],
                          axis=1), self.W0)
        else:
            self.rho = tf.matmul(self.z, self.W0)

        if self.scalings:
            self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1),
                                             (-1, 1))  # NxP
            self.lam = Gamma(self.r, self.r / (self.rho * self.l))
        else:
            self.lam = Gamma(self.r, self.r / self.rho)

        if self.zero_inflation:
            if batch_idx is not None and self.n_batches > 0:
                self.logit_pi = tf.matmul(
                    tf.concat([
                        self.z,
                        tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                                tf.float32)
                    ],
                              axis=1), self.W1)
            else:
                self.logit_pi = tf.matmul(self.z, self.W1)
            self.pi = tf.minimum(
                tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7)

            self.cat = Categorical(
                probs=tf.stack([self.pi, 1. - self.pi], axis=2))

            self.components = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=self.lam)
            ]

            self.likelihood = Mixture(cat=self.cat, components=self.components)
        else:
            self.likelihood = Poisson(rate=self.lam)
        "qhinge_loc",
        shape=X_hinge_grid.shape,
        initializer=tf.random_normal_initializer(
            mean=X_hinge_grid * tf.ones(shape=X_hinge_grid.shape, dtype=tfdt),
            stddev=0.001 * tf.ones(shape=X_hinge_grid.shape, dtype=tfdt))),
                         scale=softplus(
                             tf.get_variable(
                                 "qhinge_scale",
                                 shape=X_hinge_grid.shape,
                                 initializer=tf.random_normal_initializer(
                                     mean=2.0, stddev=0.1))))

    gamma_shape = 1.05
    gamma_rate = 12.0

    gamma = Gamma(gamma_shape, gamma_rate, sample_shape=[1, M - 1])
    qgamma = lognormal_q([1, M - 1])

    # Model definition
    X_plh = tf.placeholder(tfdt, shape=[N, D])
    PHI = rbf_kernel(X=X_plh, Y=hinge_grid, gamma=gamma, tfdt=tfdt)
    w = Normal(loc=tf.zeros((M, 1), dtype=tfdt),
               scale=900 * tf.ones((M, 1), dtype=tfdt))
    y = Bernoulli(logits=tf.matmul(PHI, w))

    # Variational distributions
    qw_loc = tf.get_variable("qw_loc", [M, 1], dtype=tfdt)
    qw_scale = softplus(120 * tf.get_variable("qw_scale", [M, 1], dtype=tfdt))
    qw = Normal(loc=qw_loc, scale=qw_scale)

    # Inference settings
Exemple #17
0
    def evaluate_loglikelihood(self, X, batch_idx=None):
        """
		This is the ELBO, which is a lower bound on the marginal log-likelihood.
		We perform some local optimization on the new data points to obtain the ELBO of the new data.
		"""
        N = X.shape[0]
        P = X.shape[1]
        K = self.n_components

        # Define new graph conditioned on the posterior global factors
        z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))
        l_test = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if batch_idx is not None and self.n_batches > 0:
            rho_test = tf.matmul(
                tf.concat([
                    z_test,
                    tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                            tf.float32)
                ],
                          axis=1), self.W0)
        else:
            rho_test = tf.matmul(z_test, self.W0)
        rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1),
                                         (-1, 1))  # NxP

        lam_test = Gamma(self.r, self.r / (rho_test * l_test))

        if self.zero_inflation:
            if batch_idx is not None and self.n_batches > 0:
                logit_pi_test = tf.matmul(
                    tf.concat([
                        z_test,
                        tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                                tf.float32)
                    ],
                              axis=1), self.W1)
            else:
                logit_pi_test = tf.matmul(z_test, self.W1)

            pi_test = tf.minimum(
                tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7)
            cat_test = Categorical(
                probs=tf.stack([pi_test, 1. - pi_test], axis=2))

            components_test = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=lam_test)
            ]
            likelihood_test = Mixture(cat=cat_test, components=components_test)
        else:
            likelihood_test = Poisson(rate=lam_test)

        qz_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(tf.ones(z_test.shape)),
                tf.nn.softplus(tf.Variable(1. * tf.ones(z_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())
        qlam_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(tf.ones(lam_test.shape)),
                tf.nn.softplus(tf.Variable(0.01 * tf.ones(lam_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())
        ql_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(self.mean_llib * tf.ones(l_test.shape)),
                tf.nn.softplus(
                    tf.Variable(
                        np.sqrt(self.std_llib) * tf.ones(l_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if self.zero_inflation:
            inference_local = ed.ReparameterizationKLqp(
                {
                    z_test: qz_test,
                    lam_test: qlam_test,
                    l_test: ql_test
                },
                data={
                    likelihood_test: tf.cast(X, tf.float32),
                    self.W0: self.est_qW0,
                    self.W1: self.est_qW1,
                    self.r: self.est_qr
                })
        else:
            inference_local = ed.ReparameterizationKLqp(
                {
                    z_test: qz_test,
                    lam_test: qlam_test,
                    l_test: ql_test
                },
                data={
                    likelihood_test: tf.cast(X, tf.float32),
                    self.W0: self.est_qW0,
                    self.r: self.est_qr
                })

        inference_local.run(n_iter=self.test_iterations,
                            n_samples=self.n_mc_samples)

        return -self.sess.run(inference_local.loss,
                              feed_dict={likelihood_test: X.astype('float32')
                                         }) / N
Exemple #18
0
def _test(alpha, beta, n):
  x = Gamma(alpha=alpha, beta=beta)
  val_est = get_dims(x.sample(n))
  val_true = n + get_dims(alpha)
  assert val_est == val_true
Exemple #19
0
We build a random variable whose size depends on a sample from another
random variable.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import tensorflow as tf

from edward.models import Exponential, Dirichlet, Gamma

ed.set_seed(42)

# Prior on scalar hyperparameter to Dirichlet.
alpha = Gamma(alpha=1.0, beta=1.0)

# Prior on size of Dirichlet.
n = 1 + tf.cast(Exponential(lam=0.5), tf.int32)

# Build a vector of ones whose size is n; multiply it by alpha.
p = Dirichlet(alpha=tf.ones([n]) * alpha)

sess = ed.get_session()
print(sess.run(p.value()))
# [ 0.01012419  0.02939712  0.05036638  0.51287931  0.31020424  0.0485355
#   0.0384932 ]
print(sess.run(p.value()))
# [ 0.12836078  0.23335715  0.63828212]
word_idx = np.logical_and(
    np.sum(x_train != 0, 1) >= 2,
    np.sum(x_train, 1) >= 10)
words = [word for word, idx in zip(words, word_idx) if idx]
x_train = x_train[word_idx, :]
x_train = x_train.T

N = x_train.shape[0]  # number of documents
D = x_train.shape[1]  # vocabulary size
K = [100, 30, 15]  # number of components per layer
q = 'lognormal'  # choice of q; 'lognormal' or 'gamma'
shape = 0.1  # gamma shape parameter
lr = 1e-4  # learning rate step-size

# MODEL
W2 = Gamma(0.1, 0.3, sample_shape=[K[2], K[1]])
W1 = Gamma(0.1, 0.3, sample_shape=[K[1], K[0]])
W0 = Gamma(0.1, 0.3, sample_shape=[K[0], D])

z3 = Gamma(0.1, 0.1, sample_shape=[N, K[2]])
z2 = Gamma(shape, shape / tf.matmul(z3, W2))
z1 = Gamma(shape, shape / tf.matmul(z2, W1))
x = Poisson(tf.matmul(z1, W0))


# INFERENCE
def pointmass_q(shape):
    min_mean = 1e-3
    mean_init = tf.random_normal(shape)
    rv = PointMass(tf.maximum(tf.nn.softplus(tf.Variable(mean_init)),
                              min_mean))
Exemple #21
0
            # ips_weights = 1. / poisson.pmf(cau.todense(), reconstr_cau_train)

            ips_weights = 1. / 0.25**np.array(4 - train_data.todense())

            # ips_weights = ips_weights / np.sum(ips_weights) * np.sum(cau.todense())

            # ips different end

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            x = Poisson(tf.matmul(U, V, transpose_b=True))

            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))


            qV_variables = [tf.Variable(tf.random_uniform([N, K])), \
                           tf.Variable(tf.random_uniform([N, K]))]

            qV = PointMass(params=tf.nn.softplus(qV_variables[0]))
Exemple #22
0

# In[21]:

K = 175
train_data = np.array(x_train, dtype=int)
N = train_data.shape[0]
D = train_data.shape[1]

tf.reset_default_graph()
sess = tf.InteractiveSession()

idx_ph = tf.placeholder(tf.int32, M)
x_ph = tf.placeholder(tf.float32, [M, D])

U = Gamma(0.1, 0.5, sample_shape=[M, K])
V = Gamma(0.1, 0.3, sample_shape=[D, K])
x = Poisson(tf.matmul(U, V, transpose_b=True))

min_scale = 1e-5

qV_variables = [
    tf.Variable(tf.random_uniform([D, K])),
    tf.Variable(tf.random_uniform([D, K]))
]

qV = TransformedDistribution(
            distribution=Normal(qV_variables[0],\
                                tf.maximum(tf.nn.softplus(qV_variables[1]), \
                                           min_scale)),
            bijector=tf.contrib.distributions.bijectors.Exp())
Exemple #23
0
            # ips_weights = 1. / poisson.pmf(cau.todense(), reconstr_cau)
            ips_weights = 1. / 0.25**np.array(4 - train_data.todense())

            # ips_weights = ips_weights / np.sum(ips_weights) * np.sum(cau.todense())

            # ips different end

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            x = Poisson(tf.matmul(U, V, transpose_b=True))

            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))


            qV_variables = [tf.Variable(tf.random_uniform([N, K])), \
                           tf.Variable(tf.random_uniform([N, K]))]

            qV = PointMass(params=tf.nn.softplus(qV_variables[0]))
Exemple #24
0
plt.plot(xn, 'go')
plt.title('Simulated dataset')
plt.show()
print('mu=7')
print('sigma=1')

# Priors definition
m = tf.constant([0.])
beta = tf.constant([0.0001])
a = tf.constant([0.001])
b = tf.constant([0.001])

# Posterior inference
# Probabilistic model
mu = Normal(loc=m, scale=beta)
sigma = Gamma(a, b)
x = Normal(loc=tf.tile(mu, [N]), scale=tf.tile(sigma, [N]))

# Variational model
qmu = Normal(loc=tf.Variable(tf.random_normal([1])),
             scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))
qsigma = Gamma(tf.nn.softplus(tf.Variable(tf.random_normal([1]))),
               tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

# Inference
inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: xn})
inference.run(n_iter=1500, n_samples=30)

sess = ed.get_session()

print('Inferred mu={}'.format(sess.run(qmu.mean())))
Exemple #25
0
def _test(alpha, beta, n):
    x = Gamma(alpha=alpha, beta=beta)
    val_est = get_dims(x.sample(n))
    val_true = n + get_dims(alpha)
    assert val_est == val_true
Exemple #26
0
We build a random variable whose size depends on a sample from another
random variable.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import tensorflow as tf

from edward.models import Exponential, Dirichlet, Gamma

ed.set_seed(42)

# Prior on scalar hyperparameter to Dirichlet.
alpha = Gamma(1.0, 1.0)

# Prior on size of Dirichlet.
n = 1 + tf.cast(Exponential(0.5), tf.int32)

# Build a vector of ones whose size is n; multiply it by alpha.
p = Dirichlet(tf.ones([n]) * alpha)

sess = ed.get_session()
print(sess.run(p))
# [ 0.01012419  0.02939712  0.05036638  0.51287931  0.31020424  0.0485355
#   0.0384932 ]
print(sess.run(p))
# [ 0.12836078  0.23335715  0.63828212]
from edward.models import Dirichlet, Categorical, Gamma

# debugging
import sys
sys.path.append("/home/victor/Documents/community_detection/Variational")

from sbm import SBM

# SBM parameters
n_vert = 100
n_comm = 3

# fake a dataset
# sort the ground truth community identities to make it easy to parse them
z_gt = tf.Variable(tf.nn.top_k(Categorical(p=tf.ones([n_vert, n_comm])/n_comm).sample(),k=n_vert).values)
eta_gt = tf.Variable(Gamma(tf.ones([n_comm, n_comm]), tf.ones([n_comm, n_comm])).sample())
g=SBM(zs = z_gt, eta = eta_gt, n_comm = n_comm)
data = SBM(zs = z_gt, eta = eta_gt, n_comm = n_comm).sample()

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    init.run()
    dataset = data.eval()
    z_gt = z_gt.eval()
    eta_gt = eta_gt.eval()

# Model

# higher level parameters
# alpha = tf.Variable(3.0,dtype=tf.float32)
# lam = tf.Variable(1,dtype=tf.float32)
Exemple #28
0
    def _build_model_and_approximations(self):
        """ Form likelihood and approximating distributions of Ouija

        """
        N = self.N
        G = self.G
        Q = self.Q

        ds = tf.contrib.distributions


        k = Normal(loc = tf.zeros([G,Q]), scale = 50 * tf.ones([G,Q]), name = "k")
        t0 = Normal(loc = 0.5 * tf.ones(G), scale = 1 * tf.ones(G))

        mu0 = Gamma(concentration = 2 * tf.ones(G), rate = tf.ones(G))

        z = Normal(loc = 0.5 * tf.ones([N,Q]), scale = tf.ones([N,Q]))

        phi = Gamma(concentration = 2 * tf.ones(1), rate = tf.ones(1))
        pbeta = Normal(loc = tf.zeros(2), scale = tf.ones(2))

        cell_mat = tf.stack([tf.reshape(z, [-1]), -tf.ones(N)], 1)
        gene_mat = tf.stack([tf.reshape(k, [-1]), tf.reshape(k, [-1]) * tf.reshape(t0, [-1])], 1)

        factor_mult = tf.matmul(cell_mat, gene_mat, transpose_b = True) 
        mu = mu0 * tf.nn.sigmoid(factor_mult)
        
        prob_dropout = pbeta[0] + pbeta[1] * mu

        Y = DropoutNormal(p_dropout = prob_dropout, loc = mu, scale = tf.sqrt(1 + phi * mu))
        Y._p_dropout = prob_dropout

        self.qk = Normal(loc = tf.Variable(tf.zeros([G, Q])),
           scale = tf.nn.softplus(tf.Variable(tf.zeros([G, Q]))))

        self.qz = ed.models.TransformedDistribution(
            distribution = ed.models.NormalWithSoftplusScale(loc = tf.Variable(tf.zeros([N,Q])),
                                                            scale = tf.Variable(tf.ones([N,Q]))),
            bijector = LogitShiftBijector(a = tf.zeros([N,Q]), b = tf.ones([N,Q])),
            name = "qz"
        )

        self.qmu0 = ed.models.TransformedDistribution(
            distribution = ed.models.NormalWithSoftplusScale(loc = tf.Variable(tf.zeros(G)),
                                                            scale = tf.Variable(tf.ones(G))),
            bijector = ds.bijectors.Exp(),
            name = "qmu0"
        )

        self.qphi = ed.models.TransformedDistribution(
            distribution = ed.models.NormalWithSoftplusScale(loc = tf.Variable(tf.zeros(1)),
                                                            scale = tf.Variable(tf.ones(1))),
            bijector = ds.bijectors.Exp(),
            name = "qphi"
        )

        self.qt0 = ed.models.TransformedDistribution(
            distribution = ed.models.NormalWithSoftplusScale(loc = tf.Variable(tf.zeros(G)),
                                                            scale = tf.Variable(tf.ones(G))),
            bijector = LogitShiftBijector(a = tf.zeros(G), b = tf.ones(G)),
            name = "qt0"
        )

        self.qbeta = Normal(loc = tf.Variable(tf.zeros(2)),
                scale = tf.nn.softplus(tf.Variable(tf.ones(2))))

        approx_dict = {
            k: self.qk,
            z: self.qz,
            mu0: self.qmu0,
            phi: self.qphi,
            t0: self.qt0,
            pbeta: self.qbeta
        }

        return approx_dict, Y
Exemple #29
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, metadata = nips(FLAGS.data_dir)
    documents = metadata['columns']
    words = metadata['rows']

    # Subset to documents in 2011 and words appearing in at least two
    # documents and have a total word count of at least 10.
    doc_idx = [
        i for i, document in enumerate(documents)
        if document.startswith('2011')
    ]
    documents = [documents[doc] for doc in doc_idx]
    x_train = x_train[:, doc_idx]
    word_idx = np.logical_and(
        np.sum(x_train != 0, 1) >= 2,
        np.sum(x_train, 1) >= 10)
    words = [word for word, idx in zip(words, word_idx) if idx]
    x_train = x_train[word_idx, :]
    x_train = x_train.T

    N = x_train.shape[0]  # number of documents
    D = x_train.shape[1]  # vocabulary size

    # MODEL
    W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]])
    W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]])
    W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D])

    z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]])
    z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2))
    z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1))
    x = Poisson(tf.matmul(z1, W0))

    # INFERENCE
    qW2 = pointmass_q(W2.shape)
    qW1 = pointmass_q(W1.shape)
    qW0 = pointmass_q(W0.shape)
    if FLAGS.q == 'gamma':
        qz3 = gamma_q(z3.shape)
        qz2 = gamma_q(z2.shape)
        qz1 = gamma_q(z1.shape)
    else:
        qz3 = lognormal_q(z3.shape)
        qz2 = lognormal_q(z2.shape)
        qz1 = lognormal_q(z1.shape)

    # We apply variational EM with E-step over local variables
    # and M-step to point estimate the global weight matrices.
    inference_e = ed.KLqp({
        z1: qz1,
        z2: qz2,
        z3: qz3
    },
                          data={
                              x: x_train,
                              W0: qW0,
                              W1: qW1,
                              W2: qW2
                          })
    inference_m = ed.MAP({
        W0: qW0,
        W1: qW1,
        W2: qW2
    },
                         data={
                             x: x_train,
                             z1: qz1,
                             z2: qz2,
                             z3: qz3
                         })

    optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr)
    optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr)
    kwargs = {
        'optimizer': optimizer_e,
        'n_print': 100,
        'logdir': FLAGS.logdir,
        'log_timestamp': False
    }
    if FLAGS.q == 'gamma':
        kwargs['n_samples'] = 30
    inference_e.initialize(**kwargs)
    inference_m.initialize(optimizer=optimizer_m)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    n_epoch = 20
    n_iter_per_epoch = 10000
    for epoch in range(n_epoch):
        print("Epoch {}".format(epoch))
        nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            info_dict_e = inference_e.update()
            info_dict_m = inference_m.update()
            nll += info_dict_e['loss']

        # Compute perplexity averaged over a number of training iterations.
        # The model's negative log-likelihood of data is upper bounded by
        # the variational objective.
        nll /= n_iter_per_epoch
        perplexity = np.exp(nll / np.sum(x_train))
        print("Negative log-likelihood <= {:0.3f}".format(nll))
        print("Perplexity <= {:0.3f}".format(perplexity))

        # Print top 10 words for first 10 topics.
        qW0_vals = sess.run(qW0)
        for k in range(10):
            top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1]
            top_words = " ".join([words[i] for i in top_words_idx])
            print("Topic {}: {}".format(k, top_words))
Exemple #30
0
def multivariate_bayesian_linear_mixed_model_and_factorization():
  ###########
  # Load/Simulate in data
  ############
  num_genes = 5000
  num_individuals = 100
  cells_per_individual = 100
  K = 3
  num_samples = num_individuals*cells_per_individual
  print('start loading')
  Y_train, Z_train, true_random_effects, true_gene_random_effects_sdevs, true_gene_residual_sdevs, U_true, V_true, gene_mean = generate_multivariate_bayesian_linear_mixed_model_and_factorization_data(num_genes, num_individuals, cells_per_individual, K)
  print('data loaded')
  ###############################
  ## MODEL
  # Y ~ 1 + (1|individual) 
  ###############################
  # Set up placeholders for the data inputs.
  ind_ph = tf.placeholder(tf.int32, [num_samples, 1])
  # Set up fixed effects.
  mu = tf.get_variable("mu", [num_genes])

  sigma_ind = tf.sqrt(tf.exp(tf.get_variable("sigma_ind", [num_genes])))

  sigma_resid = tf.sqrt(tf.exp(tf.get_variable("sigma_resid", [num_genes])))


  # Set up random effects
  eta_ind = Normal(loc=tf.zeros([num_individuals, num_genes]), scale= tf.matmul(tf.ones([num_individuals,num_genes]),tf.matrix_diag(sigma_ind)))

  # Set up factors
  #U = Normal(loc=0.0, scale=1, sample_shape=[num_samples, K])
  #V = Normal(loc=0.0, scale=1.0, sample_shape=[K, num_genes])
  #U = tf.exp(tf.get_variable("U", [num_samples, K]))
  
  # higher values of sparsity parameter result in a more sparse solution
  sparsity_parameter = 1.0
  U = Gamma(concentration=1.0, rate=sparsity_parameter, sample_shape=[num_samples,K])
  V = tf.get_variable("V", [K, num_genes])

  yhat = (tf.matmul(U, V) + tf.gather_nd(eta_ind, ind_ph) + tf.matmul(tf.ones([num_samples, num_genes]), tf.matrix_diag(mu)))
  y = Normal(loc=yhat, scale=tf.matmul(tf.ones([num_samples, num_genes]), tf.matrix_diag(sigma_resid)))

  ###############################
  ## Inference set up
  ###############################
  q_ind_s = Normal(
    loc=tf.get_variable("q_ind_s/loc", [num_individuals, num_genes]),
    scale=tf.nn.softplus(tf.get_variable("q_ind_s/scale", [num_individuals, num_genes])))

  qU = lognormal_q(U.shape)

  #qU = Normal(loc=tf.get_variable("qU/loc", [num_samples, K]),
   #           scale=tf.nn.softplus(
   #               tf.get_variable("qU/scale", [num_samples, K])))
  #qV = Normal(loc=tf.get_variable("qV/loc", [K, num_genes]),
   #           scale=tf.nn.softplus(
    #              tf.get_variable("qV/scale", [K, num_genes])))

  latent_vars = {
    U: qU,
    eta_ind: q_ind_s}
  data = {
    y: Y_train,
    ind_ph: Z_train}
  inference = ed.KLqp(latent_vars, data)
  tf.global_variables_initializer().run()
  inference.run(n_iter=500)
  # qU.distribution.scale.eval()
  pdb.set_trace()
Exemple #31
0
            print("K0", dim, "K", K)

            D = train_data.shape[0]
            N = train_data.shape[1]
            weights = train_data * alpha
            cau = exp_to_imp(train_data)

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])
            reconstr_cau_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            gamma = Gamma(tf.ones([M, 1]), tf.ones([M, 1]))
            beta0 = Gamma(0.3 * tf.ones([1, 1]), 0.3 * tf.ones([1, 1]))

            x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
                tf.multiply(tf.matmul(gamma, tf.ones([1, N])), \
                    reconstr_cau_ph)) + beta0)


            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))
            print("K0", dim, "K", K)

            D = train_data.shape[0]
            N = train_data.shape[1]
            weights = train_data * alpha
            cau = exp_to_imp(train_data)

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])
            reconstr_cau_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            gamma = Gamma(tf.ones([M, 1]), tf.ones([M, 1]))
            beta0 = Gamma(0.3 * tf.ones([1, 1]), 0.3 * tf.ones([1, 1]))

            x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
                tf.multiply(tf.matmul(gamma, tf.ones([1, N])), \
                    reconstr_cau_ph)) + beta0)


            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))