예제 #1
0
def main(_):
  dp = dirichlet_process(10.0)

  # The number of sticks broken is dynamic, changing across evaluations.
  sess = tf.Session()
  print(sess.run(dp))
  print(sess.run(dp))

  # Demo of the DirichletProcess random variable in Edward.
  base = Normal(0.0, 1.0)

  # Highly concentrated DP.
  alpha = 1.0
  dp = DirichletProcess(alpha, base)
  x = dp.sample(1000)
  samples = sess.run(x)
  plt.hist(samples, bins=100, range=(-3.0, 3.0))
  plt.title("DP({0}, N(0, 1))".format(alpha))
  plt.show()

  # More spread out DP.
  alpha = 50.0
  dp = DirichletProcess(alpha, base)
  x = dp.sample(1000)
  samples = sess.run(x)
  plt.hist(samples, bins=100, range=(-3.0, 3.0))
  plt.title("DP({0}, N(0, 1))".format(alpha))
  plt.show()

  # States persist across calls to sample() in a DP.
  alpha = 1.0
  dp = DirichletProcess(alpha, base)
  x = dp.sample(50)
  y = dp.sample(75)
  samples_x, samples_y = sess.run([x, y])
  plt.subplot(211)
  plt.hist(samples_x, bins=100, range=(-3.0, 3.0))
  plt.title("DP({0}, N(0, 1)) across two calls to sample()".format(alpha))
  plt.subplot(212)
  plt.hist(samples_y, bins=100, range=(-3.0, 3.0))
  plt.show()

  # `theta` is the distribution indirectly returned by the DP.
  # Fetching theta is the same as fetching the Dirichlet process.
  dp = DirichletProcess(alpha, base)
  theta = Normal(0.0, 1.0, value=tf.cast(dp, tf.float32))
  print(sess.run([dp, theta]))
  print(sess.run([dp, theta]))

  # DirichletProcess can also take in non-scalar concentrations and bases.
  alpha = tf.constant([0.1, 0.6, 0.4])
  base = Exponential(rate=tf.ones([5, 2]))
  dp = DirichletProcess(alpha, base)
  print(dp)
예제 #2
0
def main(_):
    ed.set_seed(42)

    # Prior on scalar hyperparameter to Dirichlet.
    alpha = Gamma(1.0, 1.0)

    # Prior on size of Dirichlet.
    n = 1 + tf.cast(Exponential(0.5), tf.int32)

    # Build a vector of ones whose size is n; multiply it by alpha.
    p = Dirichlet(tf.ones([n]) * alpha)

    sess = ed.get_session()
    print(sess.run(p))
    # [ 0.01012419  0.02939712  0.05036638  0.51287931  0.31020424  0.0485355
    #   0.0384932 ]
    print(sess.run(p))
def bayes_mult_cmd(table_file, metadata_file, formula, output_file):

    #metadata = _type_cast_to_float(metadata.copy())
    metadata = pd.read_table(metadata_file, index_col=0)
    G_data = dmatrix(formula, metadata, return_type='dataframe')
    table = load_table(table_file)

    # basic filtering parameters
    soil_filter = lambda val, id_, md: id_ in metadata.index
    read_filter = lambda val, id_, md: np.sum(val) > 10
    #sparse_filter = lambda val, id_, md: np.mean(val > 0) > 0.1
    sample_filter = lambda val, id_, md: np.sum(val) > 1000

    table = table.filter(soil_filter, axis='sample')
    table = table.filter(sample_filter, axis='sample')
    table = table.filter(read_filter, axis='observation')
    #table = table.filter(sparse_filter, axis='observation')
    print(table.shape)
    y_data = pd.DataFrame(np.array(table.matrix_data.todense()).T,
                          index=table.ids(axis='sample'),
                          columns=table.ids(axis='observation'))

    y_data, G_data = y_data.align(G_data, axis=0, join='inner')

    psi = _gram_schmidt_basis(y_data.shape[1])
    G_data = G_data.values
    y_data = y_data.values
    N, D = y_data.shape
    p = G_data.shape[1] # number of covariates
    r = G_data.shape[1] # rank of covariance matrix

    psi = tf.convert_to_tensor(psi, dtype=tf.float32)
    n = tf.convert_to_tensor(y_data.sum(axis=1), dtype=tf.float32)

    # hack to get multinomial working
    def _sample_n(self, n=1, seed=None):
        # define Python function which returns samples as a Numpy array
        def np_sample(p, n):
            return multinomial.rvs(p=p, n=n, random_state=seed).astype(np.float32)

        # wrap python function as tensorflow op
        val = tf.py_func(np_sample, [self.probs, n], [tf.float32])[0]
        # set shape from unknown shape
        batch_event_shape = self.batch_shape.concatenate(self.event_shape)
        shape = tf.concat(
            [tf.expand_dims(n, 0), tf.convert_to_tensor(batch_event_shape)], 0)
        val = tf.reshape(val, shape)
        return val
    Multinomial._sample_n = _sample_n


    # dummy variable for gradient
    G = tf.placeholder(tf.float32, [N, p])

    b = Exponential(rate=1.0)
    B = Normal(loc=tf.zeros([p, D-1]), 
               scale=tf.ones([p, D-1]) )

    # Factorization of covariance matrix
    # http://edwardlib.org/tutorials/klqp
    l = Exponential(rate=1.0)
    L = Normal(loc=tf.zeros([p, D-1]), 
               scale=tf.ones([p, D-1]) )
    z = Normal(loc=tf.zeros([N, p]), 
               scale=tf.ones([N, p]))

    # Cholesky trick to get multivariate normal
    v = tf.matmul(G, B) + tf.matmul(z, L)

    # get clr transformed values
    eta = tf.matmul(v, psi)

    Y = Multinomial(total_count=n, logits=eta)


    T = 100000  # the number of mixin samples from MCMC sampling

    qb = PointMass(params=tf.Variable(tf.random_normal([])))
    qB = PointMass(params=tf.Variable(tf.random_normal([p, D-1])))
    qz = Empirical(params=tf.Variable(tf.random_normal([T, N, p])))
    ql = PointMass(params=tf.Variable(tf.random_normal([])))
    qL = PointMass(params=tf.Variable(tf.random_normal([p, D-1])))

    # Imputation
    inference_z = ed.SGLD(
        {z: qz}, 
        data={G: G_data, Y: y_data, B: qB, L: qL}
    )

    # Maximization
    inference_BL = ed.MAP(
        {B: qB, L: qL, b: qb, l: ql}, 
        data={G: G_data, Y: y_data, z: qz}
    )

    inference_z.initialize(step_size=1e-10)
    inference_BL.initialize(n_iter=1000)


    sess = ed.get_session()
    saver = tf.train.Saver()

    tf.global_variables_initializer().run()
    for i in range(inference_BL.n_iter):
        inference_z.update()  # e-step
        # will need to compute the expectation of z

        info_dict = inference_BL.update() # m-step
        inference_BL.print_progress(info_dict)

    save_path = saver.save(sess, output_file)
    print("Model saved in file: %s" % save_path)
    pickle.dump({'qB': sess.run(qB.mean()),
                 'qL': sess.run(qL.mean()),
                 'qz': sess.run(qz.mean())},
                open(output_file + '.params.pickle', 'wb')
    )
예제 #4
0
qi_mu = tf.Variable(tf.random_normal([]))
qi_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([])))
qi = Normal(mu=qi_mu, sigma=qi_sigma)

#qw_mu = tf.expand_dims(tf.convert_to_tensor(beta0[0].astype(np.float32)),1)
qw_mu = tf.Variable(tf.random_normal([D,1]))
qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D,1])))
qw = Normal(mu=qw_mu, sigma=qw_sigma)

qb_mu = tf.Variable(tf.random_normal([Db,1]))
qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([Db,1])))
qb = Normal(mu=qb_mu, sigma=qb_sigma)

eps  = tf.nn.softplus(tf.Variable(tf.random_normal([])))
qeps = Exponential(lam=eps)

zs_wn   = {'beta': qw, 'b': qb, 'Intercept': qi, 'eps': qeps}

Xnew = ed.placeholder(tf.float32, shape=(None, D))
Znew = ed.placeholder(tf.float32, shape=(None, Db))
ynew = ed.placeholder(tf.float32, shape=(None))

data = {'X': Xnew, 'y': ynew, 'Z': Znew}

model_wn = MixedModel_wn(lik_std=0.1,prior_std=100.0)

sess = ed.get_session()
inference = ed.MFVI(zs_wn, data, model_wn)
#inference.initialize()
def _test(lam, n):
    x = Exponential(lam=lam)
    val_est = get_dims(x.sample(n))
    val_true = n + get_dims(lam)
    assert val_est == val_true
예제 #6
0
We build a random variable whose size depends on a sample from another
random variable.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import edward as ed
import tensorflow as tf

from edward.models import Exponential, Dirichlet, Gamma

ed.set_seed(42)

# Prior on scalar hyperparameter to Dirichlet.
alpha = Gamma(alpha=1.0, beta=1.0)

# Prior on size of Dirichlet.
n = 1 + tf.cast(Exponential(lam=0.5), tf.int32)

# Build a vector of ones whose size is n; multiply it by alpha.
p = Dirichlet(alpha=tf.ones([n]) * alpha)

sess = ed.get_session()
print(sess.run(p.value()))
# [ 0.01012419  0.02939712  0.05036638  0.51287931  0.31020424  0.0485355
#   0.0384932 ]
print(sess.run(p.value()))
# [ 0.12836078  0.23335715  0.63828212]
예제 #7
0
samples = sess.run(x)
plt.hist(samples, bins=100, range=(-3.0, 3.0))
plt.title("DP({0}, N(0, 1))".format(alpha))
plt.show()

# States persist across calls to sample() in a DP.
alpha = 1.0
dp = DirichletProcess(alpha, base)
x = dp.sample(50)
y = dp.sample(75)
samples_x, samples_y = sess.run([x, y])
plt.subplot(211)
plt.hist(samples_x, bins=100, range=(-3.0, 3.0))
plt.title("DP({0}, N(0, 1)) across two calls to sample()".format(alpha))
plt.subplot(212)
plt.hist(samples_y, bins=100, range=(-3.0, 3.0))
plt.show()

# ``theta`` is the distribution indirectly returned by the DP.
# Fetching theta is the same as fetching the Dirichlet process.
dp = DirichletProcess(alpha, base)
theta = Normal(0.0, 1.0, value=tf.cast(dp, tf.float32))
print(sess.run([dp, theta]))
print(sess.run([dp, theta]))

# DirichletProcess can also take in non-scalar concentrations and bases.
alpha = tf.constant([0.1, 0.6, 0.4])
base = Exponential(lam=tf.ones([5, 2]))
dp = DirichletProcess(alpha, base)
print(dp)
예제 #8
0
def _test(lam, n):
  x = Exponential(lam=lam)
  val_est = get_dims(x.sample(n))
  val_true = n + get_dims(lam)
  assert val_est == val_true
from edward.models import Normal, Exponential
import tensorflow as tf

# 都是RandomVariable衍生出来
# 一元分布
Normal(loc=tf.constant(0.0), scale=tf.constant(1.0))
Normal(loc=tf.zeros(5), scale=tf.ones(5))
Exponential(rate=tf.ones([2, 3]))

# 多元分布
from edward.models import Dirichlet, MultivariateNormalTriL
K = 3
Dirichlet(concentration=tf.constant([0.1] * K))  # K为Dirichlet分布
MultivariateNormalTriL(loc=tf.zeros([5, K]),
                       scale_tril=tf.ones([5, K, K]))  # loc的最后一位表示维数
MultivariateNormalTriL(loc=tf.zeros([2, 5, K]),
                       scale_tril=tf.ones([2, 5, K, K]))

# 每个RandomVariable有方法log_prob(),mean(),sample(),且与计算图上的一个张量对应
# 可以支持诸多运算
from edward.models import Normal

x = Normal(loc=tf.zeros(10), scale=tf.ones(10))
y = tf.constant(5.0)
x + y, x - y, x * y, x / y
tf.tanh(x * y)
tf.gather(x, 2)
print(x[2])

# 有向图模型
from edward.models import Bernoulli, Beta
예제 #10
0
samples = sess.run(x)
plt.hist(samples, bins=100, range=(-3.0, 3.0))
plt.title("DP({0}, N(0, 1))".format(alpha))
plt.show()

# States persist across calls to sample() in a DP.
alpha = 1.0
dp = DirichletProcess(alpha, base)
x = dp.sample(50)
y = dp.sample(75)
samples_x, samples_y = sess.run([x, y])
plt.subplot(211)
plt.hist(samples_x, bins=100, range=(-3.0, 3.0))
plt.title("DP({0}, N(0, 1)) across two calls to sample()".format(alpha))
plt.subplot(212)
plt.hist(samples_y, bins=100, range=(-3.0, 3.0))
plt.show()

# ``theta`` is the distribution indirectly returned by the DP.
# Fetching theta is the same as fetching the Dirichlet process.
dp = DirichletProcess(alpha, base)
theta = Normal(0.0, 1.0, value=tf.cast(dp, tf.float32))
print(sess.run([dp, theta]))
print(sess.run([dp, theta]))

# DirichletProcess can also take in non-scalar concentrations and bases.
alpha = tf.constant([0.1, 0.6, 0.4])
base = Exponential(rate=tf.ones([5, 2]))
dp = DirichletProcess(alpha, base)
print(dp)
예제 #11
0
import edward as ed
from tensorflow.python import debug as tf_debug
from edward.models import Normal, Poisson, PointMass, Exponential, Uniform, Empirical

count_data = np.loadtxt("data/txtdata.csv")
n_count_data = len(count_data)

sess = tf.Session()

alpha_f = 1.0 / count_data.mean()

with tf.name_scope('model'):
    alpha = tf.Variable(alpha_f, name="alpha", dtype=tf.float32)

    # init
    lambda_1 = Exponential(alpha, name="lambda1")
    lambda_2 = Exponential(alpha, name="lambda2")
    tau = Uniform(low=0.0, high=float(n_count_data - 1), name="tau")
    idx = np.arange(n_count_data)
    lambda_ = tf.where(
        tau >= idx,
        tf.ones(shape=[
            n_count_data,
        ], dtype=tf.float32) * lambda_1,
        tf.ones(shape=[
            n_count_data,
        ], dtype=tf.float32) * lambda_2)

    # error
    z = Poisson(lambda_, value=tf.Variable(tf.ones(n_count_data)), name="poi")