Beispiel #1
0
 def map_fn(data):
     data_shape = T.shape(data)
     leading = data_shape[:-1]
     dim_in = data_shape[-1]
     flattened = T.reshape(data, [-1, dim_in])
     net_out = network(flattened)
     if isinstance(net_out, stats.GaussianScaleDiag):
         scale_diag, mu = net_out.get_parameters('regular')
         dim_out = T.shape(mu)[-1]
         return stats.GaussianScaleDiag([
             T.reshape(scale_diag, T.concatenate([leading, [dim_out]])),
             T.reshape(mu, T.concatenate([leading, [dim_out]])),
         ])
     elif isinstance(net_out, stats.Gaussian):
         sigma, mu = net_out.get_parameters('regular')
         dim_out = T.shape(mu)[-1]
         return stats.Gaussian([
             T.reshape(sigma, T.concatenate([leading, [dim_out, dim_out]])),
             T.reshape(mu, T.concatenate([leading, [dim_out]])),
         ])
     elif isinstance(net_out, stats.Bernoulli):
         params = net_out.get_parameters('natural')
         dim_out = T.shape(params)[-1]
         return stats.Bernoulli(
             T.reshape(params, T.concatenate([leading, [dim_out]])),
             'natural')
     else:
         raise Exception("Unimplemented distribution")
Beispiel #2
0
def initialize_node(node, children):
    if isinstance(node, Gaussian):
        d = T.shape(node)
        return Gaussian([T.eye(d[-1], batch_shape=d[:-1]), T.random_normal(d)])
    elif isinstance(node, IW):
        d = T.shape(node)
        return IW([(T.to_float(d[-1]) + 1) * T.eye(d[-1], batch_shape=d[:-2]),
                   T.to_float(d[-1]) + 1])
Beispiel #3
0
 def em(i, q_dyn_natparam, q_X_natparam, _, curr_elbo):
     q_X_ = stats.LDS(q_X_natparam, 'natural')
     ess = q_X_.expected_sufficient_statistics()
     batch_size = T.shape(ess)[0]
     yyT = ess[..., :-1, ds:2 * ds, ds:2 * ds]
     xxT = ess[..., :-1, :ds, :ds]
     yxT = ess[..., :-1, ds:2 * ds, :ds]
     x = ess[..., :-1, -1, :ds]
     y = ess[..., :-1, -1, ds:2 * ds]
     xaT = T.outer(x, a)
     yaT = T.outer(y, a)
     xaxaT = T.concatenate([
         T.concatenate([xxT, xaT], -1),
         T.concatenate([T.matrix_transpose(xaT), aaT], -1),
     ], -2)
     ess = [
         yyT,
         T.concatenate([yxT, yaT], -1), xaxaT,
         T.ones([batch_size, self.horizon - 1])
     ]
     q_dyn_natparam = [
         T.sum(a, [0]) * data_strength + b
         for a, b in zip(ess, initial_dyn_natparam)
     ]
     q_dyn_ = stats.MNIW(q_dyn_natparam, 'natural')
     q_stats = q_dyn_.expected_sufficient_statistics()
     p_X = stats.LDS((q_stats, state_prior, None,
                      q_A.expected_value(), self.horizon))
     q_X_ = stats.LDS((q_stats, state_prior, q_X,
                       q_A.expected_value(), self.horizon))
     elbo = (T.sum(stats.kl_divergence(q_X_, p_X)) +
             T.sum(stats.kl_divergence(q_dyn_, prior_dyn)))
     return i + 1, q_dyn_.get_parameters(
         'natural'), q_X_.get_parameters('natural'), curr_elbo, elbo
Beispiel #4
0
    def _sample(self, num_samples):
        sigma, mu = self.natural_to_regular(self.regular_to_natural(self.get_parameters('regular')))

        L = T.cholesky(sigma)
        sample_shape = T.concat([[num_samples], T.shape(mu)], 0)
        noise = T.random_normal(sample_shape)
        L = T.tile(L[None], T.concat([[num_samples], T.ones([T.rank(sigma)], dtype=np.int32)]))
        return mu[None] + T.matmul(L, noise[..., None])[..., 0]
Beispiel #5
0
 def next_state(self, state, action, t):
     A, Q = self.get_dynamics()
     leading_dim = T.shape(state)[:-1]
     state_action = T.concatenate([state, action], -1)
     return stats.Gaussian([
         T.tile(Q[t][None], T.concatenate([leading_dim, [1, 1]])),
         T.einsum('ab,nb->na', A[t], state_action)
     ])
Beispiel #6
0
 def forward(self, q_Xt, q_At):
     Xt, At = q_Xt.expected_value(), q_At.expected_value()
     batch_size = T.shape(Xt)[0]
     XAt = T.concatenate([Xt, At], -1)
     A, Q = self.get_dynamics()
     p_Xt1 = stats.Gaussian([
         T.tile(Q[None], [batch_size, 1, 1, 1]),
         T.einsum('nhs,hxs->nhx', XAt, A)
     ])
     return p_Xt1
Beispiel #7
0
    def __init__(self,
                 sensor_models,
                 calibration_model,
                 lr=1e-4,
                 batch_size=20,
                 log_dir=None,
                 **kwargs):
        self.graph = T.core.Graph()
        self.log_dir = log_dir
        with self.graph.as_default():
            self.calibration_model = calibration_model
            self.board_ids = list(sensor_models.keys())
            self.board_map = {b: i for i, b in enumerate(self.board_ids)}
            self.sensor_map = sensor_models
            self.sensor_models = [
                sensor_models[board_id] for board_id in self.board_ids
            ]
            self.architecture = pickle.dumps(
                [sensor_models, calibration_model])
            self.batch_size = batch_size
            self.lr = lr

            self.learning_rate = T.placeholder(T.floatx(), [])
            self.sensors = T.placeholder(T.floatx(), [None, 3])
            self.env = T.placeholder(T.floatx(), [None, 3])
            self.board = T.placeholder(T.core.int32, [None])
            self.boards = T.transpose(
                T.pack([self.board,
                        T.range(T.shape(self.board)[0])]))
            self.rep = T.gather_nd(
                T.pack([
                    sensor_model(self.sensors)
                    for sensor_model in self.sensor_models
                ]), self.boards)
            self.rep_ = T.placeholder(T.floatx(),
                                      [None, self.rep.get_shape()[-1]])
            rep_env = T.concat([self.rep, self.env], -1)
            rep_env_ = T.concat([self.rep_, self.env], -1)
            self.y_ = self.calibration_model(rep_env)
            self.y_rep = self.calibration_model(rep_env_)
            self.y = T.placeholder(T.floatx(), [None, 2])
            self.loss = T.mean((self.y - self.y_)**2)
            self.mae = T.mean(T.abs(self.y - self.y_))
            T.core.summary.scalar('MSE', self.loss)
            T.core.summary.scalar('MAE', self.mae)
            self.summary = T.core.summary.merge_all()
            self.train_op = T.core.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)

        self.session = T.interactive_session(graph=self.graph)
Beispiel #8
0
 def kl_divergence(self, q_X, q_A, num_data):
     mu_shape = T.shape(q_X.get_parameters('regular')[1])
     p_X = stats.GaussianScaleDiag([T.ones(mu_shape), T.zeros(mu_shape)])
     return T.mean(T.sum(stats.kl_divergence(q_X, p_X), -1), 0), {}
Beispiel #9
0
D = 2

sigma = 0.5
sigma0 = 100
data = generate_data(N, D, K, sigma=sigma, sigma0=sigma0, seed=None)
p_pi = Dirichlet(T.constant(10.0 * np.ones([K], dtype=T.floatx())))
p_theta = NIW(
    list(
        map(lambda x: T.constant(np.array(x).astype(T.floatx())),
            [np.eye(D) * sigma, np.zeros(D), 1, D + 1])))
prior = (p_pi, p_theta)

np.random.seed(None)

X = T.placeholder(T.floatx(), [None, D])
batch_size = T.shape(X)[0]

q_pi = make_variable(Dirichlet(np.ones([K], dtype=T.floatx())))
q_theta = make_variable(
    NIW(
        map(lambda x: np.array(x).astype(T.floatx()), [
            np.tile(np.eye(D)[None] * 100, [K, 1, 1]),
            np.random.multivariate_normal(
                mean=np.zeros([D]), cov=np.eye(D) * 20, size=[K]),
            np.ones(K),
            np.ones(K) * (D + 1)
        ])))

sigma, mu = Gaussian(q_theta.expected_sufficient_statistics(),
                     parameter_type='natural').get_parameters('regular')
alpha = Categorical(q_pi.expected_sufficient_statistics(),
Beispiel #10
0
    def posterior_dynamics(self,
                           q_X,
                           q_A,
                           data_strength=1.0,
                           max_iter=200,
                           tol=1e-3):
        if self.smooth:
            if self.time_varying:
                prior_dyn = stats.MNIW(
                    self.A_variational.get_parameters('natural'), 'natural')
            else:
                natparam = self.A_variational.get_parameters('natural')
                prior_dyn = stats.MNIW([
                    T.tile(natparam[0][None], [self.horizon - 1, 1, 1]),
                    T.tile(natparam[1][None], [self.horizon - 1, 1, 1]),
                    T.tile(natparam[2][None], [self.horizon - 1, 1, 1]),
                    T.tile(natparam[3][None], [self.horizon - 1]),
                ], 'natural')
            state_prior = stats.Gaussian([T.eye(self.ds), T.zeros(self.ds)])
            aaT, a = stats.Gaussian.unpack(
                q_A.expected_sufficient_statistics())
            aaT, a = aaT[:, :-1], a[:, :-1]
            ds, da = self.ds, self.da

            initial_dyn_natparam = prior_dyn.get_parameters('natural')
            initial_X_natparam = stats.LDS(
                (self.sufficient_statistics(), state_prior, q_X,
                 q_A.expected_value(), self.horizon),
                'internal').get_parameters('natural')

            def em(i, q_dyn_natparam, q_X_natparam, _, curr_elbo):
                q_X_ = stats.LDS(q_X_natparam, 'natural')
                ess = q_X_.expected_sufficient_statistics()
                batch_size = T.shape(ess)[0]
                yyT = ess[..., :-1, ds:2 * ds, ds:2 * ds]
                xxT = ess[..., :-1, :ds, :ds]
                yxT = ess[..., :-1, ds:2 * ds, :ds]
                x = ess[..., :-1, -1, :ds]
                y = ess[..., :-1, -1, ds:2 * ds]
                xaT = T.outer(x, a)
                yaT = T.outer(y, a)
                xaxaT = T.concatenate([
                    T.concatenate([xxT, xaT], -1),
                    T.concatenate([T.matrix_transpose(xaT), aaT], -1),
                ], -2)
                ess = [
                    yyT,
                    T.concatenate([yxT, yaT], -1), xaxaT,
                    T.ones([batch_size, self.horizon - 1])
                ]
                q_dyn_natparam = [
                    T.sum(a, [0]) * data_strength + b
                    for a, b in zip(ess, initial_dyn_natparam)
                ]
                q_dyn_ = stats.MNIW(q_dyn_natparam, 'natural')
                q_stats = q_dyn_.expected_sufficient_statistics()
                p_X = stats.LDS((q_stats, state_prior, None,
                                 q_A.expected_value(), self.horizon))
                q_X_ = stats.LDS((q_stats, state_prior, q_X,
                                  q_A.expected_value(), self.horizon))
                elbo = (T.sum(stats.kl_divergence(q_X_, p_X)) +
                        T.sum(stats.kl_divergence(q_dyn_, prior_dyn)))
                return i + 1, q_dyn_.get_parameters(
                    'natural'), q_X_.get_parameters('natural'), curr_elbo, elbo

            def cond(i, _, __, prev_elbo, curr_elbo):
                with T.core.control_dependencies([T.core.print(curr_elbo)]):
                    prev_elbo = T.core.identity(prev_elbo)
                return T.logical_and(
                    T.abs(curr_elbo - prev_elbo) > tol, i < max_iter)

            result = T.while_loop(
                cond,
                em, [
                    0, initial_dyn_natparam, initial_X_natparam,
                    T.constant(-np.inf),
                    T.constant(0.)
                ],
                back_prop=False)
            pd = stats.MNIW(result[1], 'natural')
            sigma, mu = pd.expected_value()
            q_X = stats.LDS(result[2], 'natural')
            return ((mu, sigma), pd.expected_sufficient_statistics()), (q_X,
                                                                        q_A)
        else:
            q_Xt = q_X.__class__([
                q_X.get_parameters('regular')[0][:, :-1],
                q_X.get_parameters('regular')[1][:, :-1],
            ])
            q_At = q_A.__class__([
                q_A.get_parameters('regular')[0][:, :-1],
                q_A.get_parameters('regular')[1][:, :-1],
            ])
            q_Xt1 = q_X.__class__([
                q_X.get_parameters('regular')[0][:, 1:],
                q_X.get_parameters('regular')[1][:, 1:],
            ])
            (XtAt_XtAtT, XtAt), (Xt1_Xt1T,
                                 Xt1) = self.get_statistics(q_Xt, q_At, q_Xt1)
            batch_size = T.shape(XtAt)[0]
            ess = [
                Xt1_Xt1T,
                T.einsum('nha,nhb->nhba', XtAt, Xt1), XtAt_XtAtT,
                T.ones([batch_size, self.horizon - 1])
            ]
            if self.time_varying:
                posterior = stats.MNIW([
                    T.sum(a, [0]) * data_strength + b for a, b in zip(
                        ess, self.A_variational.get_parameters('natural'))
                ], 'natural')
            else:
                posterior = stats.MNIW([
                    T.sum(a, [0]) * data_strength + b[None] for a, b in zip(
                        ess, self.A_variational.get_parameters('natural'))
                ], 'natural')
            Q, A = posterior.expected_value()
            return (A, Q), q_X
Beispiel #11
0
 def shape(self):
     return T.shape(Stats.X(self.m))
Beispiel #12
0
 def shape(self):
     return T.shape(self.value)
Beispiel #13
0

data = generate_data(1000)
N = data.shape[0]
yt, yt1 = data[:, :-1], data[:, 1:]
yt, yt1 = yt.reshape([-1, D]), yt1.reshape([-1, D])

transition_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
transition_net.initialize()

rec_net = Tanh(D, 500) >> Tanh(500) >> nn.Gaussian(D)
rec_net.initialize()

Yt = T.placeholder(T.floatx(), [None, D])
Yt1 = T.placeholder(T.floatx(), [None, D])
batch_size = T.shape(Yt)[0]
num_batches = N / T.to_float(batch_size)

Yt_message = Gaussian.pack([
    T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]),
    T.einsum('ab,ib->ia',
             T.eye(D) * noise, Yt)
])
Yt1_message = Gaussian.pack([
    T.tile(T.eye(D)[None] * noise, [batch_size, 1, 1]),
    T.einsum('ab,ib->ia',
             T.eye(D) * noise, Yt1)
])
transition = Gaussian(transition_net(Yt)).expected_value()

max_iter = 1000
Beispiel #14
0
    def initialize(self):
        self.graph = T.core.Graph()
        with self.graph.as_default():
            prior_params = self.prior_params.copy()
            prior_type = prior_params.pop('prior_type')
            self.prior = PRIOR_MAP[prior_type](self.ds, self.da, self.horizon, **prior_params)

            cost_params = self.cost_params.copy()
            cost_type = cost_params.pop('cost_type')
            self.cost = COST_MAP[cost_type](self.ds, self.da, **cost_params)

            self.O = T.placeholder(T.floatx(), [None, None, self.do])
            self.U = T.placeholder(T.floatx(), [None, None, self.du])
            self.C = T.placeholder(T.floatx(), [None, None])
            self.S = T.placeholder(T.floatx(), [None, None, self.ds])
            self.A = T.placeholder(T.floatx(), [None, None, self.da])

            self.t = T.placeholder(T.int32, [])
            self.state, self.action = T.placeholder(T.floatx(), [None, self.ds]), T.placeholder(T.floatx(), [None, self.da])
            if self.prior.has_dynamics():
                self.next_state = self.prior.next_state(self.state, self.action, self.t)
                self.prior_dynamics = self.prior.get_dynamics()

            self.num_data = T.scalar()
            self.beta = T.placeholder(T.floatx(), [])
            self.learning_rate = T.placeholder(T.floatx(), [])
            self.model_learning_rate = T.placeholder(T.floatx(), [])

            self.S_potentials = util.map_network(self.state_encoder)(self.O)
            self.A_potentials = util.map_network(self.action_encoder)(self.U)

            if self.prior.is_dynamics_prior():
                self.data_strength = T.placeholder(T.floatx(), [])
                self.max_iter = T.placeholder(T.int32, [])
                posterior_dynamics, (encodings, actions) = \
                        self.prior.posterior_dynamics(self.S_potentials, self.A_potentials,
                                                      data_strength=self.data_strength,
                                                      max_iter=self.max_iter)
                self.posterior_dynamics_ = posterior_dynamics, (encodings.expected_value(), actions.expected_value())

            if self.prior.is_filtering_prior():
                self.prior_dynamics_stats = self.prior.sufficient_statistics()
                self.dynamics_stats = (
                    T.placeholder(T.floatx(), [None, self.ds, self.ds]),
                    T.placeholder(T.floatx(), [None, self.ds, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None, self.ds + self.da, self.ds + self.da]),
                    T.placeholder(T.floatx(), [None]),
                )
                S_natparam = self.S_potentials.get_parameters('natural')
                num_steps = T.shape(S_natparam)[1]

                self.padded_S = stats.Gaussian(T.core.pad(
                    self.S_potentials.get_parameters('natural'),
                    [[0, 0], [0, self.horizon - num_steps], [0, 0], [0, 0]]
                ), 'natural')
                self.padded_A = stats.GaussianScaleDiag([
                    T.core.pad(self.A_potentials.get_parameters('regular')[0],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]]),
                    T.core.pad(self.A_potentials.get_parameters('regular')[1],
                            [[0, 0], [0, self.horizon - num_steps], [0, 0]])
                ], 'regular')
                self.q_S_padded, self.q_A_padded = self.prior.encode(
                    self.padded_S, self.padded_A,
                    dynamics_stats=self.dynamics_stats
                )
                self.q_S_filter = self.q_S_padded.filter(max_steps=num_steps)
                self.q_A_filter = self.q_A_padded.__class__(
                    self.q_A_padded.get_parameters('natural')[:, :num_steps]
                , 'natural')
                self.e_q_S_filter = self.q_S_filter.expected_value()
                self.e_q_A_filter = self.q_A_filter.expected_value()

            (self.q_S, self.q_A), self.prior_kl, self.kl_grads, self.info = self.prior.posterior_kl_grads(
                self.S_potentials, self.A_potentials, self.num_data
            )

            self.q_S_sample = self.q_S.sample()[0]
            self.q_A_sample = self.q_A.sample()[0]

            self.q_O = util.map_network(self.state_decoder)(self.q_S_sample)
            self.q_U = util.map_network(self.action_decoder)(self.q_A_sample)
            self.q_O_sample = self.q_O.sample()[0]
            self.q_U_sample = self.q_U.sample()[0]

            self.q_O_ = util.map_network(self.state_decoder)(self.S)
            self.q_U_ = util.map_network(self.action_decoder)(self.A)
            self.q_O__sample = self.q_O_.sample()[0]
            self.q_U__sample = self.q_U_.sample()[0]

            self.cost_likelihood = self.cost.log_likelihood(self.q_S_sample, self.C)
            if self.cost.is_cost_function():
                self.evaluated_cost = self.cost.evaluate(self.S)
            self.log_likelihood = T.sum(self.q_O.log_likelihood(self.O), axis=1)

            self.elbo = T.mean(self.log_likelihood + self.cost_likelihood - self.prior_kl)
            train_elbo = T.mean(self.log_likelihood + self.beta * (self.cost_likelihood - self.prior_kl))
            T.core.summary.scalar("encoder-stdev", T.mean(self.S_potentials.get_parameters('regular')[0]))
            T.core.summary.scalar("log-likelihood", T.mean(self.log_likelihood))
            T.core.summary.scalar("cost-likelihood", T.mean(self.cost_likelihood))
            T.core.summary.scalar("prior-kl", T.mean(self.prior_kl))
            T.core.summary.scalar("beta", self.beta)
            T.core.summary.scalar("elbo", self.elbo)
            T.core.summary.scalar("beta-elbo", train_elbo)
            for k, v in self.info.items():
                T.core.summary.scalar(k, T.mean(v))
            self.summary = T.core.summary.merge_all()
            neural_params = (
                self.state_encoder.get_parameters()
                + self.state_decoder.get_parameters()
                + self.action_encoder.get_parameters()
                + self.action_decoder.get_parameters()
            )
            cost_params = self.cost.get_parameters()
            if len(neural_params) > 0:
                optimizer = T.core.train.AdamOptimizer(self.learning_rate)
                gradients, variables = zip(*optimizer.compute_gradients(-train_elbo, var_list=neural_params))
                gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                self.neural_op = optimizer.apply_gradients(zip(gradients, variables))
            else:
                self.neural_op = T.core.no_op()
            if len(cost_params) > 0:
                self.cost_op = T.core.train.AdamOptimizer(self.learning_rate).minimize(-self.elbo, var_list=cost_params)
            else:
                self.cost_op = T.core.no_op()
            if len(self.kl_grads) > 0:
                if self.prior.is_dynamics_prior():
                    # opt = lambda x: T.core.train.MomentumOptimizer(x, 0.5)
                    opt = lambda x: T.core.train.GradientDescentOptimizer(x)
                else:
                    opt = T.core.train.AdamOptimizer
                self.dynamics_op = opt(self.model_learning_rate).apply_gradients([
                    (b, a) for a, b in self.kl_grads
                ])
            else:
                self.dynamics_op = T.core.no_op()
            self.train_op = T.core.group(self.neural_op, self.dynamics_op, self.cost_op)
        self.session = T.interactive_session(graph=self.graph, allow_soft_placement=True, log_device_placement=False)
Beispiel #15
0
 def activate(self, X):
     shape = T.shape(X)
     return stats.NIW.pack(
         [T.outer(X, X), X,
          T.ones(shape[:-1]),
          T.ones(shape[:-1])])
Beispiel #16
0
 def kl_gradients(self, q_X, q_A, _, num_data):
     if self.smooth:
         ds = self.ds
         ess = q_X.expected_sufficient_statistics()
         yyT = ess[..., :-1, ds:2 * ds, ds:2 * ds]
         xxT = ess[..., :-1, :ds, :ds]
         yxT = ess[..., :-1, ds:2 * ds, :ds]
         aaT, a = stats.Gaussian.unpack(
             q_A.expected_sufficient_statistics())
         aaT, a = aaT[:, :-1], a[:, :-1]
         x = ess[..., :-1, -1, :ds]
         y = ess[..., :-1, -1, ds:2 * ds]
         xaT = T.outer(x, a)
         yaT = T.outer(y, a)
         xaxaT = T.concatenate([
             T.concatenate([xxT, xaT], -1),
             T.concatenate([T.matrix_transpose(xaT), aaT], -1),
         ], -2)
         batch_size = T.shape(ess)[0]
         num_batches = T.to_float(num_data) / T.to_float(batch_size)
         ess = [
             yyT,
             T.concatenate([yxT, yaT], -1), xaxaT,
             T.ones([batch_size, self.horizon - 1])
         ]
     else:
         q_Xt = q_X.__class__([
             q_X.get_parameters('regular')[0][:, :-1],
             q_X.get_parameters('regular')[1][:, :-1],
         ])
         q_At = q_A.__class__([
             q_A.get_parameters('regular')[0][:, :-1],
             q_A.get_parameters('regular')[1][:, :-1],
         ])
         q_Xt1 = q_X.__class__([
             q_X.get_parameters('regular')[0][:, 1:],
             q_X.get_parameters('regular')[1][:, 1:],
         ])
         (XtAt_XtAtT, XtAt), (Xt1_Xt1T,
                              Xt1) = self.get_statistics(q_Xt, q_At, q_Xt1)
         batch_size = T.shape(XtAt)[0]
         num_batches = T.to_float(num_data) / T.to_float(batch_size)
         ess = [
             Xt1_Xt1T,
             T.einsum('nha,nhb->nhba', XtAt, Xt1), XtAt_XtAtT,
             T.ones([batch_size, self.horizon - 1])
         ]
     if self.time_varying:
         ess = [
             T.sum(ess[0], [0]),
             T.sum(ess[1], [0]),
             T.sum(ess[2], [0]),
             T.sum(ess[3], [0]),
         ]
     else:
         ess = [
             T.sum(ess[0], [0, 1]),
             T.sum(ess[1], [0, 1]),
             T.sum(ess[2], [0, 1]),
             T.sum(ess[3], [0, 1]),
         ]
     return [
         -(a + num_batches * b - c) / T.to_float(num_data)
         for a, b, c in zip(
             self.A_prior.get_parameters('natural'),
             ess,
             self.A_variational.get_parameters('natural'),
         )
     ]
Beispiel #17
0
 def kl_divergence(self, q_X, q_A, num_data):
     batch_size = T.shape(q_X.expected_value())[0]
     return T.zeros(batch_size), {}
Beispiel #18
0
 def shape(self):
     return T.shape(self.get_parameters('natural')[Stats.LogX])
Beispiel #19
0

(X, Y) = generate_data(N, D, seed=3)
cf = LogisticRegression(fit_intercept=False)
cf.fit(X, Y)
coef_ = cf.coef_
score_ = cf.score(X, Y)

q_w = make_variable(
    Gaussian([T.to_float(np.eye(D))[None],
              T.to_float(np.zeros(D))[None]]))

x, y = T.matrix(), T.vector()

lr = 1e-4
batch_size = T.shape(x)[0]
num_batches = T.to_float(N / batch_size)

with T.initialization('xavier'):
    # stats_net = Relu(D + 1, 20) >> Relu(20) >> GaussianLayer(D)
    stats_net = GaussianLayer(D + 1, D)
net_out = stats_net(T.concat([x, y[..., None]], -1))
stats = T.sum(net_out.get_parameters('natural'), 0)[None]

natural_gradient = (p_w.get_parameters('natural') + num_batches * stats -
                    q_w.get_parameters('natural')) / N
next_w = Gaussian(q_w.get_parameters('natural') + lr * natural_gradient,
                  parameter_type='natural')

l_w = kl_divergence(q_w, p_w)[0]