Esempio n. 1
0
    def test_metrics_classification(self):
        with self.test_session():
            x = Bernoulli(probs=0.51)
            x_data = tf.constant(1)
            self.assertAllClose(
                1.0, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1))
            x = Bernoulli(probs=0.51, sample_shape=5)
            x_data = tf.constant([1, 1, 1, 0, 0])
            self.assertAllClose(
                0.6, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1))
            x = Bernoulli(probs=tf.constant([0.51, 0.49, 0.49]))
            x_data = tf.constant([1, 0, 1])
            self.assertAllClose(
                2.0 / 3,
                ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1))

            x = Categorical(probs=tf.constant([0.48, 0.51, 0.01]))
            x_data = tf.constant(1)
            self.assertAllClose(
                1.0,
                ed.evaluate('sparse_categorical_accuracy', {x: x_data},
                            n_samples=1))
            x = Categorical(probs=tf.constant([0.48, 0.51, 0.01]),
                            sample_shape=5)
            x_data = tf.constant([1, 1, 1, 0, 2])
            self.assertAllClose(
                0.6,
                ed.evaluate('sparse_categorical_accuracy', {x: x_data},
                            n_samples=1))
            x = Categorical(
                probs=tf.constant([[0.48, 0.51, 0.01], [0.51, 0.48, 0.01]]))
            x_data = tf.constant([1, 2])
            self.assertAllClose(
                0.5,
                ed.evaluate('sparse_categorical_accuracy', {x: x_data},
                            n_samples=1))

            x = Multinomial(total_count=1.0,
                            probs=tf.constant([0.48, 0.51, 0.01]))
            x_data = tf.constant([0, 1, 0], dtype=x.dtype.as_numpy_dtype)
            self.assertAllClose(
                1.0,
                ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1))
            x = Multinomial(total_count=1.0,
                            probs=tf.constant([0.48, 0.51, 0.01]),
                            sample_shape=5)
            x_data = tf.constant(
                [[0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]],
                dtype=x.dtype.as_numpy_dtype)
            self.assertAllClose(
                0.6,
                ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1))

            x = Multinomial(total_count=5.0,
                            probs=tf.constant([0.4, 0.6, 0.0]))
            x_data = tf.constant([2, 3, 0], dtype=x.dtype.as_numpy_dtype)
            self.assertAllClose(
                1.0,
                ed.evaluate('multinomial_accuracy', {x: x_data}, n_samples=1))
def _test(shape, n_minibatch):
    K = shape[-1]
    multinomial = Multinomial(shape, pi=tf.constant(1.0/K, shape=shape))
    with sess.as_default():
        pi = multinomial.pi.eval()
        z = np.zeros((n_minibatch, ) + tuple(shape))
        for i in range(shape[0]):
            z[:, i, :] = np.random.multinomial(1, pi[i, :], size=n_minibatch)

        z_tf = tf.constant(z, dtype=tf.float32)
        for i in range(shape[0]):
            assert np.allclose(
                multinomial.log_prob_idx((i, ), z_tf).eval(),
                multinomial_logpmf_vec(z[:, i, :], 1, pi[i, :]))
Esempio n. 3
0
    def run(self, adj_mat, n_iter=1000):
        assert adj_mat.shape[0] == adj_mat.shape[1]
        n_node = adj_mat.shape[0]

        # model
        gamma = Dirichlet(concentration=tf.ones([self.n_cluster]))
        Pi = Beta(concentration0=tf.ones([self.n_cluster, self.n_cluster]),
                  concentration1=tf.ones([self.n_cluster, self.n_cluster]))
        Z = Multinomial(total_count=1., probs=gamma, sample_shape=n_node)
        X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

        # inference (point estimation)
        qgamma = PointMass(params=tf.nn.softmax(
            tf.Variable(tf.random_normal([self.n_cluster]))))
        qPi = PointMass(params=tf.nn.sigmoid(
            tf.Variable(tf.random_normal([self.n_cluster, self.n_cluster]))))
        qZ = PointMass(params=tf.nn.softmax(
            tf.Variable(tf.random_normal([n_node, self.n_cluster]))))

        # map estimation
        inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: adj_mat})
        inference.initialize(n_iter=n_iter)

        tf.global_variables_initializer().run()

        for _ in range(inference.n_iter):
            info_dict = inference.update()
            inference.print_progress(info_dict)
        inference.finalize()
        return qZ.mean().eval().argmax(axis=1)
Esempio n. 4
0
def _test_log_prob_i(n_minibatch, num_factors, K):
    multinomial = Multinomial([num_factors, K],
                               pi=tf.constant(1.0/K, shape=[num_factors, K]))
    with sess.as_default():
        pi = multinomial.pi.eval()
        z = np.zeros((n_minibatch, K*num_factors))
        for i in range(num_factors):
            z[:, (i*K):((i+1)*K)] = np.random.multinomial(1, pi[i, :], size=n_minibatch)

        z_tf = tf.constant(z, dtype=tf.float32)
        for i in range(num_factors):
            # NOTE: since Tensorflow has no special functions, the values here are
            # only an approximation
            assert np.allclose(
                multinomial.log_prob_i(i, z_tf).eval(),
                multinomial_logpmf_vec(z[:, (i*K):((i+1)*K)], 1, pi[i, :]),
                atol=1e-4)
Esempio n. 5
0
def _test(shape, n):
    K = shape[-1]
    rv = Multinomial(shape, pi=tf.constant(1.0/K, shape=shape))
    rv_sample = rv.sample(n)
    x = rv_sample.eval()
    x_tf = tf.constant(x, dtype=tf.float32)
    pi = rv.pi.eval()
    if len(shape) == 1:
        assert np.allclose(
            rv.log_prob_idx((), x_tf).eval(),
            multinomial_logpmf_vec(x[:, :], 1, pi[:]))
    elif len(shape) == 2:
        for i in range(shape[0]):
            assert np.allclose(
                rv.log_prob_idx((i, ), x_tf).eval(),
                multinomial_logpmf_vec(x[:, i, :], 1, pi[i, :]))
    else:
        assert False
Esempio n. 6
0
def _test_log_prob_zi(n_minibatch, num_factors, K):
    multinomial = Multinomial(num_factors, K)
    multinomial.pi = tf.constant(1.0 / K, shape=[num_factors, K])

    with sess.as_default():
        pi = multinomial.pi.eval()
        z = np.zeros((n_minibatch, K * num_factors))
        for i in range(num_factors):
            z[:,
              (i * K):((i + 1) * K)] = np.random.multinomial(1,
                                                             pi[i, :],
                                                             size=n_minibatch)

        z_tf = tf.constant(z, dtype=tf.float32)
        for i in range(num_factors):
            # NOTE: since Tensorflow has no special functions, the values here are
            # only an approximation
            assert np.allclose(multinomial.log_prob_zi(i, z_tf).eval(),
                               multinomial_logpmf_vec(
                                   z[:, (i * K):((i + 1) * K)], 1, pi[i, :]),
                               atol=1e-4)
Esempio n. 7
0
def mmsb(N, K, data):
    # sparsity
    rho = 0.3
    # MODEL
    # probability of belonging to each of K blocks for each node
    gamma = Dirichlet(concentration=tf.ones([K]))
    # block connectivity
    Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
    # probability of belonging to each of K blocks for all nodes
    Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N)
    # adjacency
    X = Bernoulli(probs=(1 - rho) *
                  tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

    # INFERENCE (EM algorithm)
    qgamma = PointMass(
        params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
    qPi = PointMass(
        params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
    qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))

    #qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]),
    #                scale=tf.nn.softplus(
    #                        tf.get_variable("qgamma/scale", [K])))
    #qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]),
    #                scale=tf.nn.softplus(
    #                        tf.get_variable("qPi/scale", [K, K])))
    #qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]),
    #                scale=tf.nn.softplus(
    #                        tf.get_variable("qZ/scale", [N, K])))

    #inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})
    inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})

    #inference.run()
    n_iter = 6000
    inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
                         n_iter=n_iter)

    tf.global_variables_initializer().run()

    for _ in range(inference.n_iter):
        info_dict = inference.update()
        inference.print_progress(info_dict)

    inference.finalize()
    print('qgamma after: ', qgamma.mean().eval())
    return qZ.mean().eval(), qPi.eval()
Esempio n. 8
0
def model_stationary_dirichlet_multinomial(n_states, chain_len,
                                           total_counts_per_month):
    """ Models a stationary Dirichlet-Multinomial Markov Chain in Edward """
    tf.reset_default_graph()

    # create default starting state probability vector
    pi_list = [Dirichlet(tf.ones(n_states)) for _ in range(chain_len)]
    # now instead of sample_shape we use total_count which
    # is how many times we sample from each categorical
    # i.e. number of accounts
    counts = [
        Multinomial(probs=pi, total_count=float(total_counts_per_month))
        for pi in pi_list
    ]

    return pi_list, counts
Esempio n. 9
0
def main(_):
    ed.set_seed(42)

    # DATA
    X_data, Z_true = karate("~/data")
    N = X_data.shape[0]  # number of vertices
    K = 2  # number of clusters

    # MODEL
    gamma = Dirichlet(concentration=tf.ones([K]))
    Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
    Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N)
    X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

    # INFERENCE (EM algorithm)
    qgamma = PointMass(tf.nn.softmax(tf.get_variable("qgamma/params", [K])))
    qPi = PointMass(tf.nn.sigmoid(tf.get_variable("qPi/params", [K, K])))
    qZ = PointMass(tf.nn.softmax(tf.get_variable("qZ/params", [N, K])))

    inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data})
    inference.initialize(n_iter=250)

    tf.global_variables_initializer().run()

    for _ in range(inference.n_iter):
        info_dict = inference.update()
        inference.print_progress(info_dict)

    # CRITICISM
    Z_pred = qZ.mean().eval().argmax(axis=1)
    print("Result (label flip can happen):")
    print("Predicted")
    print(Z_pred)
    print("True")
    print(Z_true)
    print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
def bayes_mult_cmd(table_file, metadata_file, formula, output_file):

    #metadata = _type_cast_to_float(metadata.copy())
    metadata = pd.read_table(metadata_file, index_col=0)
    G_data = dmatrix(formula, metadata, return_type='dataframe')
    table = load_table(table_file)

    # basic filtering parameters
    soil_filter = lambda val, id_, md: id_ in metadata.index
    read_filter = lambda val, id_, md: np.sum(val) > 10
    #sparse_filter = lambda val, id_, md: np.mean(val > 0) > 0.1
    sample_filter = lambda val, id_, md: np.sum(val) > 1000

    table = table.filter(soil_filter, axis='sample')
    table = table.filter(sample_filter, axis='sample')
    table = table.filter(read_filter, axis='observation')
    #table = table.filter(sparse_filter, axis='observation')
    print(table.shape)
    y_data = pd.DataFrame(np.array(table.matrix_data.todense()).T,
                          index=table.ids(axis='sample'),
                          columns=table.ids(axis='observation'))

    y_data, G_data = y_data.align(G_data, axis=0, join='inner')

    psi = _gram_schmidt_basis(y_data.shape[1])
    G_data = G_data.values
    y_data = y_data.values
    N, D = y_data.shape
    p = G_data.shape[1] # number of covariates
    r = G_data.shape[1] # rank of covariance matrix

    psi = tf.convert_to_tensor(psi, dtype=tf.float32)
    n = tf.convert_to_tensor(y_data.sum(axis=1), dtype=tf.float32)

    # hack to get multinomial working
    def _sample_n(self, n=1, seed=None):
        # define Python function which returns samples as a Numpy array
        def np_sample(p, n):
            return multinomial.rvs(p=p, n=n, random_state=seed).astype(np.float32)

        # wrap python function as tensorflow op
        val = tf.py_func(np_sample, [self.probs, n], [tf.float32])[0]
        # set shape from unknown shape
        batch_event_shape = self.batch_shape.concatenate(self.event_shape)
        shape = tf.concat(
            [tf.expand_dims(n, 0), tf.convert_to_tensor(batch_event_shape)], 0)
        val = tf.reshape(val, shape)
        return val
    Multinomial._sample_n = _sample_n


    # dummy variable for gradient
    G = tf.placeholder(tf.float32, [N, p])

    b = Exponential(rate=1.0)
    B = Normal(loc=tf.zeros([p, D-1]), 
               scale=tf.ones([p, D-1]) )

    # Factorization of covariance matrix
    # http://edwardlib.org/tutorials/klqp
    l = Exponential(rate=1.0)
    L = Normal(loc=tf.zeros([p, D-1]), 
               scale=tf.ones([p, D-1]) )
    z = Normal(loc=tf.zeros([N, p]), 
               scale=tf.ones([N, p]))

    # Cholesky trick to get multivariate normal
    v = tf.matmul(G, B) + tf.matmul(z, L)

    # get clr transformed values
    eta = tf.matmul(v, psi)

    Y = Multinomial(total_count=n, logits=eta)


    T = 100000  # the number of mixin samples from MCMC sampling

    qb = PointMass(params=tf.Variable(tf.random_normal([])))
    qB = PointMass(params=tf.Variable(tf.random_normal([p, D-1])))
    qz = Empirical(params=tf.Variable(tf.random_normal([T, N, p])))
    ql = PointMass(params=tf.Variable(tf.random_normal([])))
    qL = PointMass(params=tf.Variable(tf.random_normal([p, D-1])))

    # Imputation
    inference_z = ed.SGLD(
        {z: qz}, 
        data={G: G_data, Y: y_data, B: qB, L: qL}
    )

    # Maximization
    inference_BL = ed.MAP(
        {B: qB, L: qL, b: qb, l: ql}, 
        data={G: G_data, Y: y_data, z: qz}
    )

    inference_z.initialize(step_size=1e-10)
    inference_BL.initialize(n_iter=1000)


    sess = ed.get_session()
    saver = tf.train.Saver()

    tf.global_variables_initializer().run()
    for i in range(inference_BL.n_iter):
        inference_z.update()  # e-step
        # will need to compute the expectation of z

        info_dict = inference_BL.update() # m-step
        inference_BL.print_progress(info_dict)

    save_path = saver.save(sess, output_file)
    print("Model saved in file: %s" % save_path)
    pickle.dump({'qB': sess.run(qB.mean()),
                 'qL': sess.run(qL.mean()),
                 'qz': sess.run(qz.mean())},
                open(output_file + '.params.pickle', 'wb')
    )
Esempio n. 11
0
def _test(shape, p, n):
    x = Multinomial(shape, p)
    val_est = tuple(get_dims(x.sample(n)))
    val_true = (n, ) + shape
    assert val_est == val_true
Esempio n. 12
0
    def test_metrics_with_binary_averaging(self):
        x = Multinomial(total_count=10.0, probs=tf.constant([0.2, 0.7, 0.1]))
        x_data = tf.constant([5, 4, 1], dtype=x.dtype.as_numpy_dtype)
        self.assertAllEqual(
            np.array([9.0, 4.0, 1.0], dtype=np.float32),
            ed.evaluate([('mean_squared_error', {
                'average': None
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))
        x = Multinomial(total_count=10.0, probs=tf.constant([0.2, 0.7, 0.1]))
        x_data = tf.constant([5, 4, 1], dtype=x.dtype.as_numpy_dtype)
        self.assertAllClose(
            4.6666665,
            ed.evaluate([('mean_squared_error', {
                'average': 'macro'
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))
        x = Multinomial(total_count=10.0, probs=tf.constant([0.2, 0.7, 0.1]))
        x_data = tf.constant([5, 4, 1], dtype=x.dtype.as_numpy_dtype)
        self.assertAllClose(
            4.6666665,
            ed.evaluate([('mean_squared_error', {
                'average': 'micro'
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))

        x = Multinomial(total_count=10.0,
                        probs=tf.constant([0.2, 0.7, 0.1]),
                        sample_shape=5)
        x_data = tf.constant(
            [[2, 7, 1], [3, 6, 1], [3, 5, 2], [4, 4, 2], [2, 7, 1]],
            dtype=x.dtype.as_numpy_dtype)
        self.assertAllEqual(
            np.array([1.2, 1.4, 0.6], dtype=np.float32),
            ed.evaluate([('mean_squared_error', {
                'average': None
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))
        x = Multinomial(total_count=10.0,
                        probs=tf.constant([0.2, 0.7, 0.1]),
                        sample_shape=5)
        x_data = tf.constant(
            [[2, 7, 1], [3, 6, 1], [3, 5, 2], [4, 4, 2], [2, 7, 1]],
            dtype=x.dtype.as_numpy_dtype)
        self.assertAllClose(
            1.066666603088379,
            ed.evaluate([('mean_squared_error', {
                'average': 'macro'
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))
        x = Multinomial(total_count=10.0,
                        probs=tf.constant([0.2, 0.7, 0.1]),
                        sample_shape=5)
        x_data = tf.constant(
            [[2, 7, 1], [3, 6, 1], [3, 5, 2], [4, 4, 2], [2, 7, 1]],
            dtype=x.dtype.as_numpy_dtype)
        self.assertAllClose(
            1.0666667222976685,
            ed.evaluate([('mean_squared_error', {
                'average': 'micro'
            })], {x: x_data},
                        n_samples=1,
                        seed=self.RANDOM_SEED))
Esempio n. 13
0
  return X, Z


ed.set_seed(42)

# DATA
label_filepath = 'data/karate_labels.txt'
graph_filepath = 'data/karate_edgelist.txt'
X_data, Z_true = build_dataset(label_filepath, graph_filepath)
N = X_data.shape[0]  # number of vertices
K = 2  # number of clusters

# MODEL
gamma = Dirichlet(concentration=tf.ones([K]))
Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
Z = Multinomial(total_count=1., probs=gamma, sample_shape=N)
X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))

# INFERENCE (EM algorithm)
qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))

inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data})

n_iter = 100
inference.initialize(n_iter=n_iter)

tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
Esempio n. 14
0

# data
N = 50
K = 10

t_true = np.random.randint(0, 2, size=[N])
t_true_2D = np.array([t_true, 1-t_true])
alpha_true = np.random.beta(a=1, b=1, size=[K, 2])

x_data = np.random.rand(K, N) < np.dot(alpha_true, t_true_2D)
x_data = x_data + 0

# model
pi = Dirichlet(concentration=tf.ones(2))
t = Multinomial(total_count=1., probs=pi, sample_shape=N)
alpha = Beta(concentration0=tf.ones([K, 2]), concentration1=tf.ones([K, 2]))
X = Bernoulli(probs=tf.matmul(alpha, tf.transpose(t)))

# inference
qpi = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([2]))))
qt = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, 2]))))
qalpha = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, 2]))))

inference = ed.MAP({pi: qpi, t: qt, alpha: qalpha}, data={X: x_data})

inference.run(n_iter=5000)


# criticism
t_pred = qt.mean().eval().argmax(axis=1)
Esempio n. 15
0
def _test(shape, p, size):
    x = Multinomial(shape, p)
    val_est = tuple(get_dims(x.sample(size=size)))
    val_true = (size, ) + shape
    assert val_est == val_true
def _test(shape, p, n):
    x = Multinomial(shape, p)
    val_est = tuple(get_dims(x.sample(n)))
    val_true = (n, ) + shape
    assert val_est == val_true