Ejemplo n.º 1
0
 def test_log_likelihood2(self):
     d = 100
     data = np.tile(np.eye(d)[None], [10, 1, 1])
     S = np.diag(np.exp(np.random.normal(size=d)))
     sigma = IW([T.to_float(S), d + 1])
     np.testing.assert_almost_equal(
         self.session.run(sigma.log_likelihood(T.to_float(data))),
         invwishart(scale=S, df=d + 1).logpdf(data.T), 3)
Ejemplo n.º 2
0
 def test_log_likelihood2(self):
     d = 100
     data = invwishart(scale=np.eye(d), df=d + 1).rvs(size=100)
     S = np.eye(d)
     sigma = IW([T.to_float(S), d + 1])
     np.testing.assert_almost_equal(
         self.session.run(sigma.log_likelihood(T.to_float(data))),
         invwishart(scale=S, df=d + 1).logpdf(data.T), -1)
Ejemplo n.º 3
0
Archivo: vmp.py Proyecto: sharadmv/nvmp
def initialize_node(node, children):
    if isinstance(node, Gaussian):
        d = T.shape(node)
        return Gaussian([T.eye(d[-1], batch_shape=d[:-1]), T.random_normal(d)])
    elif isinstance(node, IW):
        d = T.shape(node)
        return IW([(T.to_float(d[-1]) + 1) * T.eye(d[-1], batch_shape=d[:-2]),
                   T.to_float(d[-1]) + 1])
Ejemplo n.º 4
0
 def initialize_objective(self):
     self.C, self.c = (
         T.variable(T.random_normal([self.ds, self.ds])),
         T.variable(T.random_normal([self.ds])),
     )
     if self.learn_stdev:
         self.stdev = T.variable(T.to_float(self.cost_stdev))
     else:
         self.stdev = T.to_float(self.cost_stdev)
Ejemplo n.º 5
0
 def test_log_likelihood1(self):
     d = 2
     data = np.tile(np.eye(d)[None], [10, 1, 1])
     sigma = IW([T.eye(d), d + 1])
     np.testing.assert_almost_equal(
         self.session.run(sigma.log_likelihood(T.to_float(data))),
         invwishart(scale=np.eye(2), df=d + 1).logpdf(data.T), 5)
Ejemplo n.º 6
0
Archivo: vmp.py Proyecto: sharadmv/nvmp
def vmp(graph, data, max_iter=100, tol=1e-4):
    q, visible = {}, {}
    for node in top_sort(graph)[::-1]:
        if node in data:
            visible[node] = T.to_float(data[node])
        else:
            q[node] = initialize_node(node, {})

    ordering = list(q.keys())
    params = [q[var].get_parameters('natural') for var in ordering]
    prev_elbo = T.constant(float('inf'))

    def cond(i, elbo, prev_elbo, q):
        return T.logical_and(i < max_iter, abs(elbo - prev_elbo) > tol)

    def step(i, elbo, prev_elbo, q):
        prev_elbo = elbo
        q_vars = {
            var: var.__class__(param, 'natural')
            for var, param in zip(ordering, q)
        }
        q, elbo = message_passing(q_vars, visible)
        return i + 1, elbo, prev_elbo, [
            q[var].get_parameters('natural') for var in ordering
        ]

    i, elbo, prev_elbo, q = T.while_loop(cond, step,
                                         [0, float('inf'), 0.0, params])
    return {
        var: var.__class__(param, 'natural')
        for var, param in zip(ordering, q)
    }, elbo
Ejemplo n.º 7
0
 def test_log_z(self):
     d = 100
     data = invwishart(scale=np.eye(d), df=d + 1).rvs(size=100)
     S = np.eye(d)
     sigma = IW([T.to_float(S), d + 1])
     np.testing.assert_almost_equal(self.session.run(sigma.log_z()),
                                    self.log_z(S, d + 1), 3)
     np.testing.assert_almost_equal(
         self.session.run(sigma.log_z('natural')), self.log_z(S, d + 1), 3)
Ejemplo n.º 8
0
 def log_z(self, parameter_type='regular', stop_gradient=False):
     if parameter_type == 'regular':
         sigma, mu = self.get_parameters('regular', stop_gradient=stop_gradient)
         d = T.to_float(self.shape()[-1])
         hsi, hlds = Stats.HSI(sigma), Stats.HLDS(sigma)
         mmT = Stats.XXT(mu)
         return (
             - T.sum(hsi * mmT, [-1, -2]) - hlds
             + d / 2. * np.log(2 * np.pi)
         )
     else:
         natparam = self.get_parameters('natural', stop_gradient=stop_gradient)
         d = T.to_float(self.shape()[-1])
         J, m = natparam[Stats.XXT], natparam[Stats.X]
         return (
             - 0.25 * (m[..., None, :]@T.matrix_inverse(J)@m[..., None])[..., 0, 0]
             - 0.5 * T.logdet(-2 * J)
             + d / 2. * np.log(2 * np.pi)
         )
Ejemplo n.º 9
0
 def test_stats1(self):
     d = 2
     S = np.eye(d)
     sigma = IW([T.to_float(S), d + 1])
     stats = self.stats(S, d + 1)
     stats_ = self.session.run(sigma.expected_sufficient_statistics())
     [
         np.testing.assert_almost_equal(stats_[s], stats[s])
         for s in sigma.statistics()
     ]
Ejemplo n.º 10
0
 def initialize_objective(self):
     H, ds, da = self.horizon, self.ds, self.da
     if self.time_varying:
         A = T.concatenate(
             [T.eye(ds, batch_shape=[H - 1]),
              T.zeros([H - 1, ds, da])], -1)
         self.A_prior = stats.MNIW([
             2 * T.eye(ds, batch_shape=[H - 1]), A,
             T.eye(ds + da, batch_shape=[H - 1]),
             T.to_float(ds + 2) * T.ones([H - 1])
         ],
                                   parameter_type='regular')
         self.A_variational = stats.MNIW(list(
             map(
                 T.variable,
                 stats.MNIW.regular_to_natural([
                     2 * T.eye(ds, batch_shape=[H - 1]),
                     A + 1e-2 * T.random_normal([H - 1, ds, ds + da]),
                     T.eye(ds + da, batch_shape=[H - 1]),
                     T.to_float(ds + 2) * T.ones([H - 1])
                 ]))),
                                         parameter_type='natural')
     else:
         A = T.concatenate([T.eye(ds), T.zeros([ds, da])], -1)
         self.A_prior = stats.MNIW(
             [2 * T.eye(ds), A,
              T.eye(ds + da),
              T.to_float(ds + 2)],
             parameter_type='regular')
         self.A_variational = stats.MNIW(list(
             map(
                 T.variable,
                 stats.MNIW.regular_to_natural([
                     2 * T.eye(ds),
                     A + 1e-2 * T.random_normal([ds, ds + da]),
                     T.eye(ds + da),
                     T.to_float(ds + 2)
                 ]))),
                                         parameter_type='natural')
Ejemplo n.º 11
0
 def kl_divergence(self, q_X, q_A, num_data):
     if (q_X, q_A) not in self.cache:
         if self.smooth:
             state_prior = stats.GaussianScaleDiag(
                 [T.ones(self.ds), T.zeros(self.ds)])
             self.p_X = stats.LDS(
                 (self.sufficient_statistics(), state_prior, None,
                  q_A.expected_value(), self.horizon), 'internal')
             local_kl = stats.kl_divergence(q_X, self.p_X)
             if self.time_varying:
                 global_kl = T.sum(
                     stats.kl_divergence(self.A_variational, self.A_prior))
             else:
                 global_kl = stats.kl_divergence(self.A_variational,
                                                 self.A_prior)
             prior_kl = T.mean(local_kl,
                               axis=0) + global_kl / T.to_float(num_data)
             A, Q = self.get_dynamics()
             model_stdev = T.sqrt(T.matrix_diag_part(Q))
             self.cache[(q_X, q_A)] = prior_kl, {
                 'local-kl': local_kl,
                 'global-kl': global_kl,
                 'model-stdev': model_stdev,
             }
         else:
             q_Xt = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, :-1],
                 q_X.get_parameters('regular')[1][:, :-1],
             ])
             q_At = q_A.__class__([
                 q_A.get_parameters('regular')[0][:, :-1],
                 q_A.get_parameters('regular')[1][:, :-1],
             ])
             p_Xt1 = self.forward(q_Xt, q_At)
             q_Xt1 = q_X.__class__([
                 q_X.get_parameters('regular')[0][:, 1:],
                 q_X.get_parameters('regular')[1][:, 1:],
             ])
             num_data = T.to_float(num_data)
             rmse = T.sqrt(
                 T.sum(T.square(
                     q_Xt1.get_parameters('regular')[1] -
                     p_Xt1.get_parameters('regular')[1]),
                       axis=-1))
             A, Q = self.get_dynamics()
             model_stdev = T.sqrt(T.matrix_diag_part(Q))
             local_kl = T.sum(stats.kl_divergence(q_Xt1, p_Xt1), axis=1)
             if self.time_varying:
                 global_kl = T.sum(
                     stats.kl_divergence(self.A_variational, self.A_prior))
             else:
                 global_kl = stats.kl_divergence(self.A_variational,
                                                 self.A_prior)
             self.cache[(q_X, q_A)] = (T.mean(local_kl, axis=0) +
                                       global_kl / T.to_float(num_data), {
                                           'rmse': rmse,
                                           'model-stdev': model_stdev,
                                           'local-kl': local_kl,
                                           'global-kl': global_kl
                                       })
     return self.cache[(q_X, q_A)]
Ejemplo n.º 12
0
 def kl_gradients(self, q_X, q_A, _, num_data):
     if self.smooth:
         ds = self.ds
         ess = q_X.expected_sufficient_statistics()
         yyT = ess[..., :-1, ds:2 * ds, ds:2 * ds]
         xxT = ess[..., :-1, :ds, :ds]
         yxT = ess[..., :-1, ds:2 * ds, :ds]
         aaT, a = stats.Gaussian.unpack(
             q_A.expected_sufficient_statistics())
         aaT, a = aaT[:, :-1], a[:, :-1]
         x = ess[..., :-1, -1, :ds]
         y = ess[..., :-1, -1, ds:2 * ds]
         xaT = T.outer(x, a)
         yaT = T.outer(y, a)
         xaxaT = T.concatenate([
             T.concatenate([xxT, xaT], -1),
             T.concatenate([T.matrix_transpose(xaT), aaT], -1),
         ], -2)
         batch_size = T.shape(ess)[0]
         num_batches = T.to_float(num_data) / T.to_float(batch_size)
         ess = [
             yyT,
             T.concatenate([yxT, yaT], -1), xaxaT,
             T.ones([batch_size, self.horizon - 1])
         ]
     else:
         q_Xt = q_X.__class__([
             q_X.get_parameters('regular')[0][:, :-1],
             q_X.get_parameters('regular')[1][:, :-1],
         ])
         q_At = q_A.__class__([
             q_A.get_parameters('regular')[0][:, :-1],
             q_A.get_parameters('regular')[1][:, :-1],
         ])
         q_Xt1 = q_X.__class__([
             q_X.get_parameters('regular')[0][:, 1:],
             q_X.get_parameters('regular')[1][:, 1:],
         ])
         (XtAt_XtAtT, XtAt), (Xt1_Xt1T,
                              Xt1) = self.get_statistics(q_Xt, q_At, q_Xt1)
         batch_size = T.shape(XtAt)[0]
         num_batches = T.to_float(num_data) / T.to_float(batch_size)
         ess = [
             Xt1_Xt1T,
             T.einsum('nha,nhb->nhba', XtAt, Xt1), XtAt_XtAtT,
             T.ones([batch_size, self.horizon - 1])
         ]
     if self.time_varying:
         ess = [
             T.sum(ess[0], [0]),
             T.sum(ess[1], [0]),
             T.sum(ess[2], [0]),
             T.sum(ess[3], [0]),
         ]
     else:
         ess = [
             T.sum(ess[0], [0, 1]),
             T.sum(ess[1], [0, 1]),
             T.sum(ess[2], [0, 1]),
             T.sum(ess[3], [0, 1]),
         ]
     return [
         -(a + num_batches * b - c) / T.to_float(num_data)
         for a, b, c in zip(
             self.A_prior.get_parameters('natural'),
             ess,
             self.A_variational.get_parameters('natural'),
         )
     ]
Ejemplo n.º 13
0
                    parameter_type='natural').get_parameters('regular')

pi_cmessage = q_pi.expected_sufficient_statistics()
x_tmessage = NIW.pack([
    T.outer(X, X),
    X,
    T.ones([batch_size]),
    T.ones([batch_size]),
])
x_stats = Gaussian.pack([
    T.outer(X, X),
    X,
])
theta_cmessage = q_theta.expected_sufficient_statistics()

num_batches = N / T.to_float(batch_size)
nat_scale = 10.0

parent_z = q_pi.expected_sufficient_statistics()[None]
new_z = T.einsum('iab,jab->ij', x_tmessage, theta_cmessage) + parent_z
q_z = Categorical(new_z - T.logsumexp(new_z, -1)[..., None],
                  parameter_type='natural')
p_z = Categorical(parent_z - T.logsumexp(parent_z, -1),
                  parameter_type='natural')
l_z = T.sum(kl_divergence(q_z, p_z))
z_pmessage = q_z.expected_sufficient_statistics()

pi_stats = T.sum(z_pmessage, 0)
parent_pi = p_pi.get_parameters('natural')
current_pi = q_pi.get_parameters('natural')
pi_gradient = nat_scale / N * (parent_pi + num_batches * pi_stats - current_pi)
Ejemplo n.º 14
0
data = generate_data(1000)
N = data.shape[0]
yt, yt1 = data[:, :-1], data[:, 1:]
yt, yt1 = yt.reshape([-1, D]), yt1.reshape([-1, D])

transition_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
transition_net.initialize()

rec_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
rec_net.initialize()

Yt = T.placeholder(T.floatx(), [None, D])
Yt1 = T.placeholder(T.floatx(), [None, D])
batch_size = T.shape(Yt)[0]
num_batches = N / T.to_float(batch_size)

Yt_message = Gaussian.pack([
    T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]),
    T.einsum('ab,ib->ia',
             T.eye(D) * noise, Yt)
])
Yt1_message = Gaussian.pack([
    T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]),
    T.einsum('ab,ib->ia',
             T.eye(D) * noise, Yt1)
])
transition = Gaussian(transition_net(Yt)).expected_value()

max_iter = 1000
tol = 1e-5
Ejemplo n.º 15
0
def make_variable(dist):
    return dist.__class__(T.variable(T.to_float(
        dist.get_parameters('natural'))),
                          parameter_type='natural')
Ejemplo n.º 16
0

def make_variable(dist):
    return dist.__class__(T.variable(T.to_float(
        dist.get_parameters('natural'))),
                          parameter_type='natural')


(X, Y) = generate_data(N, D, seed=3)
cf = LogisticRegression(fit_intercept=False)
cf.fit(X, Y)
coef_ = cf.coef_
score_ = cf.score(X, Y)

q_w = make_variable(
    Gaussian([T.to_float(np.eye(D))[None],
              T.to_float(np.zeros(D))[None]]))

x, y = T.matrix(), T.vector()

lr = 1e-4
batch_size = T.shape(x)[0]
num_batches = T.to_float(N / batch_size)

with T.initialization('xavier'):
    # stats_net = Relu(D + 1, 20) >> Relu(20) >> GaussianLayer(D)
    stats_net = GaussianLayer(D + 1, D)
net_out = stats_net(T.concat([x, y[..., None]], -1))
stats = T.sum(net_out.get_parameters('natural'), 0)[None]

natural_gradient = (p_w.get_parameters('natural') + num_batches * stats -