Esempio n. 1
0
def _make_grad_hmm_normalizer(argnum, ans, log_pi0, log_Ps, ll):
    # Unbox the inputs if necessary
    log_pi0 = getval(log_pi0)
    log_Ps = getval(log_Ps)
    ll = getval(ll)

    # Make sure everything is C contiguous
    to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr
    log_pi0 = to_c(log_pi0)
    log_Ps = to_c(log_Ps)
    ll = to_c(ll)

    dlog_pi0 = np.zeros_like(log_pi0)
    dlog_Ps= np.zeros_like(log_Ps)
    dll = np.zeros_like(ll)
    T, K = ll.shape
    
    # Forward pass to get alphas
    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll)
    
    if argnum == 0:
        return lambda g: g * dlog_pi0
    if argnum == 1:
        return lambda g: g * dlog_Ps
    if argnum == 2:
        return lambda g: g * dll
Esempio n. 2
0
def hmm_expected_states(log_pi0, log_Ps, ll):
    T, K = ll.shape

    # Make sure everything is C contiguous
    to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr
    log_pi0 = to_c(getval(log_pi0))
    log_Ps = to_c(getval(log_Ps))
    ll = to_c(getval(ll))

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])

    betas = np.zeros((T, K))
    backward_pass(log_Ps, ll, betas)    

    expected_states = alphas + betas
    expected_states -= logsumexp(expected_states, axis=1, keepdims=True)
    expected_states = np.exp(expected_states)
    
    expected_joints = alphas[:-1,:,None] + betas[1:,None,:] + ll[1:,None,:] + log_Ps
    expected_joints -= expected_joints.max((1,2))[:,None, None]
    expected_joints = np.exp(expected_joints)
    expected_joints /= expected_joints.sum((1,2))[:,None,None]
    
    return expected_states, expected_joints, normalizer
 def store_args(self, params, X, Y, param_encoding_pairs):
     arg_dict = {
         'features': getval(X),
         'targets': getval(Y)}
     for param in params:
         arg_dict[param.name] = getval(param.data())
     fname = self._filename()
     np.save(fname + '_args.npy', arg_dict)
     with open(fname + '_args.txt', 'w') as f:
         self._write_meta(f)
         for param, encoding in param_encoding_pairs:
             f.write(param_to_pretty_string(param, encoding) + '\n')
Esempio n. 4
0
    def forward(self, X1, X2):
        d1 = self.kernel1.dimension
        X1_shape = getval(X1.shape)
        X2_shape = getval(X2.shape)
        X1_1 = anp.take(X1, range(0, d1), axis=1)
        X1_2 = anp.take(X1, range(d1, X1_shape[1]), axis=1)
        X2_1 = anp.take(X2, range(0, d1), axis=1)
        X2_2 = anp.take(X2, range(d1, X2_shape[1]), axis=1)

        kmat1 = self.kernel1(X1_1, X2_1)
        kmat2 = self.kernel2(X1_2, X2_2)
        return kmat1 * kmat2
def sample_posterior_joint(features,
                           mean,
                           kernel,
                           chol_fact,
                           pred_mat,
                           test_features,
                           num_samples=1):
    """
    Draws num_sample samples from joint posterior distribution over inputs
    test_features. This is done by computing mean and covariance matrix of
    this posterior, and using the Cholesky decomposition of the latter. If
    pred_mat is a matrix with m columns, the samples returned have shape
    (n_test, m, num_samples).

    :param features: Training inputs
    :param mean: Mean function
    :param kernel: Kernel function
    :param chol_fact: Part L of posterior state
    :param pred_mat: Part P of posterior state
    :param test_features: Test inputs
    :param num_samples: Number of samples to draw
    :return: Samples, shape (n_test, num_samples) or (n_test, m, num_samples)
    """
    k_tr_te = kernel(features, test_features)
    linv_k_tr_te = aspl.solve_triangular(chol_fact, k_tr_te, lower=True)
    posterior_mean = anp.matmul(anp.transpose(linv_k_tr_te), pred_mat) + \
                     anp.reshape(mean(test_features), (-1, 1))
    posterior_cov = kernel(test_features, test_features) - anp.dot(
        anp.transpose(linv_k_tr_te), linv_k_tr_te)
    jitter_init = anp.ones((1, )) * (1e-5)
    sys_mat = AddJitterOp(flatten_and_concat(posterior_cov, jitter_init),
                          initial_jitter_factor=NOISE_VARIANCE_LOWER_BOUND)
    lfact = cholesky_factorization(sys_mat)
    # Draw samples
    # posterior_mean.shape = (n_test, m), where m is number of cols of pred_mat
    # Reshape to (n_test, m, 1)
    n_test = getval(posterior_mean.shape)[0]
    posterior_mean = anp.expand_dims(posterior_mean, axis=-1)
    n01_vecs = [
        anp.random.normal(size=getval(posterior_mean.shape))
        for _ in range(num_samples)
    ]
    n01_mat = anp.reshape(anp.concatenate(n01_vecs, axis=-1), (n_test, -1))
    samples = anp.reshape(anp.dot(lfact, n01_mat), (n_test, -1, num_samples))
    samples = samples + posterior_mean

    if samples.shape[1] == 1:
        samples = anp.reshape(samples, (n_test, -1))  # (n_test, num_samples)

    return samples
Esempio n. 6
0
    def cost(self, x, u, u_last, a):
        c = 0.

        if self.slew_rate:
            c += (u - u_last).T @ np.diag(self.uw) @ (u - u_last)
        else:
            c += u.T @ np.diag(self.uw) @ u

        if a:
            y = np.hstack((wrap_angle(x[0]), x[1])) if self.periodic else x
            J, j = self.features_jacobian(getval(y))
            z = J(getval(y)) @ y + j
            c += a * (z - self.g).T @ np.diag(self.gw) @ (z - self.g)

        return c
def sample_and_cholesky_update(features, chol_fact, pred_mat, mean, kernel,
                               noise_variance, feature):
    # Draw sample target. Also, lvec is reused below
    lvec = _compute_lvec(features, chol_fact, kernel, feature)
    pred_mean = anp.dot(lvec, pred_mat) + anp.reshape(mean(feature), (1, 1))
    # Note: We do not add noise_variance to the predictive variance
    pred_std = anp.reshape(
        anp.sqrt(
            anp.maximum(
                kernel.diagonal(feature) - anp.sum(anp.square(lvec)),
                MIN_POSTERIOR_VARIANCE)), (1, 1))
    n01mat = anp.random.normal(size=getval(pred_mean.shape))
    target = pred_mean + anp.multiply(n01mat, pred_std)
    chol_fact_new, pred_mat_new = cholesky_update(features,
                                                  chol_fact,
                                                  pred_mat,
                                                  mean,
                                                  kernel,
                                                  noise_variance,
                                                  feature,
                                                  target,
                                                  lvec=lvec)
    features_new = anp.concatenate([features, feature], axis=0)

    return chol_fact_new, pred_mat_new, features_new, target
    def diagonal(self, X):
        X = self._check_input_shape(X)
        covariance_scale = self._covariance_scale()
        covariance_scale_times_ones = anp.multiply(
            anp.ones((getval(X.shape[0]), 1)), covariance_scale)

        return anp.reshape(covariance_scale_times_ones, (-1, ))
Esempio n. 9
0
 def diagonal(self, X):
     d1 = self.kernel1.dimension
     X_shape = getval(X.shape)
     X1 = anp.take(X, range(0, d1), axis=1)
     X2 = anp.take(X, range(d1, X_shape[1]), axis=1)
     diag1 = self.kernel1.diagonal(X1)
     diag2 = self.kernel2.diagonal(X2)
     return diag1 * diag2
 def store_value(self, value):
     fname = self._filename()
     with open(fname + '_value.txt', 'w') as f:
         f.write('value = {}\n'.format(getval(value)))
         self._write_meta(f)
     # Advance counters
     self.global_counter += 1
     self.local_counter += 1
Esempio n. 11
0
    def forward(self, X):
        """
        Actual computation of the scalar mean function
        We compute mean_value * vector_of_ones, whose dimensions are given by
        the the first column of X

        :param X: input data of size (n,d) for which we want to compute the
            mean (here, only useful to extract the right dimension)
        """
        mean_value = encode_unwrap_parameter(
            self.mean_value_internal, self.encoding)
        return anp.multiply(anp.ones((getval(X.shape[0]), 1)), mean_value)
def negative_log_marginal_likelihood(chol_fact, pred_mat):
    """
    The marginal likelihood is only computed if pred_mat has a single column
    (not for fantasy sample case).
    """
    assert pred_mat.ndim == 1 or pred_mat.shape[1] == 1,\
        "Multiple target vectors are not supported"
    sqnorm_predmat = anp.sum(anp.square(pred_mat))
    logdet_cholfact = 2.0 * anp.sum(anp.log(anp.abs(anp.diag(chol_fact))))
    n_samples = getval(pred_mat.size)

    return 0.5 * (sqnorm_predmat + n_samples * anp.log(2 * anp.pi) +
                  logdet_cholfact)
Esempio n. 13
0
def cholesky_update(features,
                    chol_fact,
                    pred_mat,
                    mean,
                    kernel,
                    noise_variance,
                    feature,
                    target,
                    lvec=None):
    """
    Incremental update of posterior state (Cholesky factor, prediction
    matrix), given one datapoint (feature, target).

    Note: noise_variance is the initial value, before any jitter may have
    been added to compute chol_fact. Here, we add the minimum amount of
    jitter such that the new diagonal entry of the Cholesky factor is
    >= MIN_CHOLESKY_DIAGONAL_VALUE. This means that if cholesky_update is
    used several times, we in fact add a diagonal (but not spherical)
    jitter matrix.

    :param features: Shape (n, d)
    :param chol_fact: Shape (n, n)
    :param pred_mat: Shape (n, m)
    :param mean:
    :param kernel:
    :param noise_variance:
    :param feature: Shape (1, d)
    :param target: Shape (1, m)
    :param lvec: If given, this is the new column of the Cholesky factor
        except the diagonal entry. If not, this is computed here
    :return: chol_fact_new (n+1, n+1), pred_mat_new (n+1, m)
    """
    if lvec is None:
        lvec = _compute_lvec(features, chol_fact, kernel, feature)
    kscal = anp.reshape(kernel.diagonal(feature), (1, ))
    noise_variance = anp.reshape(noise_variance, (1, ))
    lsqscal = anp.maximum(kscal + noise_variance - anp.sum(anp.square(lvec)),
                          MIN_CHOLESKY_DIAGONAL_VALUE**2)
    lscal = anp.reshape(anp.sqrt(lsqscal), (1, 1))
    mscal = anp.reshape(mean(feature), (1, 1))
    pvec = target - mscal
    pvec = anp.divide(pvec - anp.matmul(lvec, pred_mat), lscal)
    pred_mat_new = anp.concatenate([pred_mat, pvec], axis=0)
    zerovec = anp.zeros((getval(lvec.size), 1))
    chol_fact_new = anp.concatenate([
        anp.concatenate([chol_fact, lvec], axis=0),
        anp.concatenate([zerovec, lscal], axis=0)
    ],
                                    axis=1)

    return chol_fact_new, pred_mat_new
def sample_posterior_marginals(features,
                               mean,
                               kernel,
                               chol_fact,
                               pred_mat,
                               test_features,
                               num_samples=1):
    """
    Draws num_sample samples from the product of marginals of the posterior
    over input points test_features. If pred_mat is a matrix with m columns,
    the samples returned have shape (n_test, m, num_samples).

    :param features: Training inputs
    :param mean: Mean function
    :param kernel: Kernel function
    :param chol_fact: Part L of posterior state
    :param pred_mat: Part P of posterior state
    :param test_features: Test inputs
    :param num_samples: Number of samples to draw
    :return: Samples, shape (n_test, num_samples) or (n_test, m, num_samples)
    """
    post_means, post_vars = predict_posterior_marginals(
        features, mean, kernel, chol_fact, pred_mat, test_features)
    post_means = anp.expand_dims(post_means, axis=-1)  # (n_test, m, 1)
    post_stds = anp.sqrt(anp.reshape(post_vars, (-1, 1, 1)))  # (n_test, 1, 1)
    n01_vecs = [
        anp.random.normal(size=getval(post_means.shape))
        for _ in range(num_samples)
    ]
    n01_mat = anp.concatenate(n01_vecs, axis=-1)
    samples = anp.multiply(n01_mat, post_stds) + post_means

    if samples.shape[1] == 1:
        n_test = getval(samples.shape)[0]
        samples = anp.reshape(samples, (n_test, -1))  # (n_test, num_samples)

    return samples
    def forward(self, features, param_internal):
        """Returns constant positive vector

        If features.shape = (n, d), the shape of the vector returned is
        (d, 1) if size_cols = True, (n, 1) otherwise.

        :param features: Matrix for shape, dtype, ctx
        :param param_internal: Unwrapped parameter
        :return: Constant positive vector
        """
        # Shape, dtype, ctx is determined by extracting column or row from
        # features, then use ones_like
        axis = 0 if self.size_cols else 1
        ones_vec = anp.ones((features.size//getval(features.shape)[axis], 1))
        param = anp.reshape(self.encoding.get(param_internal), (1, 1))
        return anp.multiply(ones_vec, param)
Esempio n. 16
0
def param_to_pretty_string(gluon_param, encoding):
    """
    Take a gluon parameter and transform it to a string amenable to plotting
    If need be, the gluon parameter is appropriately encoded (e.g., log-exp transform).

    :param gluon_param: gluon parameter
    :param encoding: object in charge of encoding/decoding the gluon_param
    """
    assert isinstance(gluon_param, Parameter)
    assert encoding is not None, "encoding of param {} should not be None".format(
        gluon_param.name)
    param_as_numpy = encoding.get(getval(gluon_param.data()))

    return "{}: {}".format(
        gluon_param.name,
        ";".join("{:.6f}".format(value) for value in param_as_numpy))
Esempio n. 17
0
    def __init__(self,
                 features: np.ndarray,
                 targets: Optional[np.ndarray],
                 mean: MeanFunction,
                 kernel: KernelFunction,
                 noise_variance: np.ndarray,
                 debug_log: bool = False,
                 test_intermediates: Optional[dict] = None,
                 **kwargs):
        """
        If targets has m > 1 columns, they correspond to fantasy samples.

        If targets is None, this is an internal (copy) constructor, where
        kwargs contains chol_fact, pred_mat.

        :param features: Input points X, shape (n, d)
        :param targets: Targets Y, shape (n, m)
        :param mean: Mean function m(X)
        :param kernel: Kernel function k(X, X')
        :param noise_variance: Noise variance sigsq, shape (1,)
        :param test_intermediates: See cholesky_computations
        """
        self.mean = mean
        self.kernel = kernel
        self.noise_variance = anp.array(noise_variance, copy=True)
        if targets is not None:
            targets_shape = getval(targets.shape)
            targets = anp.reshape(targets, (targets_shape[0], -1))

            chol_fact, pred_mat = cholesky_computations(
                features,
                targets,
                mean,
                kernel,
                noise_variance,
                debug_log=debug_log,
                test_intermediates=test_intermediates)

            self.features = anp.array(features, copy=True)
            self.chol_fact = chol_fact
            self.pred_mat = pred_mat
            self._test_intermediates = test_intermediates
        else:
            # Internal (copy) constructor
            self.features = features
            self.chol_fact = kwargs['chol_fact']
            self.pred_mat = kwargs['pred_mat']
Esempio n. 18
0
    def _compute_terms(self, X, alpha, mean_lam, gamma, delta, ret_mean=False):
        dim = self.kernel_x.dimension
        X_shape = getval(X.shape)
        cfg = anp.take(X, range(0, dim), axis=1)
        res = anp.take(X, range(dim, X_shape[1]), axis=1)
        kappa = self._compute_kappa(res, alpha, mean_lam)
        kr_pref = anp.reshape(gamma, (1, 1))

        if ret_mean or (self.encoding_delta is not None) or delta > 0.0:
            mean = self.mean_x(cfg)
        else:
            mean = None
        if self.encoding_delta is not None:
            kr_pref = anp.subtract(kr_pref, anp.multiply(delta, mean))
        elif delta > 0.0:
            kr_pref = anp.subtract(kr_pref, mean * delta)

        return cfg, res, kappa, kr_pref, mean
Esempio n. 19
0
 def _diagonal_normal_policy(state):
     return rng.normal(getval(policy(state)), scale)
Esempio n. 20
0
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd.scipy.misc import logsumexp
from autograd.scipy.linalg import cholesky_banded, solve_banded, solveh_banded
from autograd.extend import primitive, defvjp
from autograd.tracer import getval
from functools import partial

from ssm.cstats import _blocks_to_bands_lower, _blocks_to_bands_upper, \
                       _bands_to_blocks_lower, _bands_to_blocks_upper, \
                       _transpose_banded, vjp_cholesky_banded_lower, \
                       _vjp_solve_banded_A, _vjp_solveh_banded_A

from ssm.messages import forward_pass, backward_pass, backward_sample, grad_hmm_normalizer

to_c = lambda arr: np.copy(getval(arr), 'C') if not arr.flags['C_CONTIGUOUS'] else getval(arr)

@primitive
def hmm_normalizer(log_pi0, log_Ps, ll):
    T, K = ll.shape
    alphas = np.zeros((T, K))

    # Make sure everything is C contiguous
    log_pi0 = to_c(log_pi0)
    log_Ps = to_c(log_Ps)
    ll = to_c(ll)

    forward_pass(log_pi0, log_Ps, ll, alphas)    
    return logsumexp(alphas[-1])
    
def _make_grad_hmm_normalizer(argnum, ans, log_pi0, log_Ps, ll):
Esempio n. 21
0
 def cost(self, x, u, u_lst):
     _J, _j = self.features_jacobian(getval(x))
     _x = _J(getval(x)) @ x + _j
     return self.dt * (
         (_x - self._g).T @ np.diag(self._gw) @ (_x - self._g) +
         (u - u_lst).T @ np.diag(self._uw) @ (u - u_lst))
Esempio n. 22
0
 def cost(self, x, u, a):
     _J, _j = self.features_jacobian(getval(x))
     _x = _J(getval(x)) @ x + _j
     return a * (_x - self._g).T @ np.diag(self._gw) @ (_x - self._g)\
            + u.T @ np.diag(self._uw) @ u
 def _check_input_shape(self, X):
     return anp.reshape(X, (getval(X.shape[0]), self._dimension))
Esempio n. 24
0
 def forward(self, X):
     return anp.zeros((getval(X.shape[0]), 1))
Esempio n. 25
0
File: lds.py Progetto: yahmadian/ssm
    def _surrogate_elbo(self,
                        variational_posterior,
                        datas,
                        inputs=None,
                        masks=None,
                        tags=None,
                        alpha=0.75,
                        **kwargs):
        """
        Lower bound on the marginal likelihood p(y | gamma)
        using variational posterior q(x; phi) where phi = variational_params
        and gamma = emission parameters.  As part of computing this objective,
        we optimize q(z | x) and take a natural gradient step wrt theta, the
        parameters of the dynamics model.

        Note that the surrogate ELBO is a lower bound on the ELBO above.
           E_p(z | x, y)[log p(z, x, y)]
           = E_p(z | x, y)[log p(z, x, y) - log p(z | x, y) + log p(z | x, y)]
           = E_p(z | x, y)[log p(x, y) + log p(z | x, y)]
           = log p(x, y) + E_p(z | x, y)[log p(z | x, y)]
           = log p(x, y) -H[p(z | x, y)]
          <= log p(x, y)
        with equality only when p(z | x, y) is atomic.  The gap equals the
        entropy of the posterior on z.
        """
        # log p(theta)
        elbo = self.log_prior()

        # Sample x from the variational posterior
        xs = variational_posterior.sample()

        # Inner optimization: find the true posterior p(z | x, y; theta).
        # Then maximize the inner ELBO wrt theta,
        #
        #    E_p(z | x, y; theta_fixed)[log p(z, x, y; theta).
        #
        # This can be seen as a natural gradient step in theta
        # space.  Note: we do not want to compute gradients wrt x or the
        # emissions parameters backward throgh this optimization step,
        # so we unbox them first.
        xs_unboxed = [getval(x) for x in xs]
        emission_params_boxed = self.emissions.params
        flat_emission_params_boxed, unflatten = flatten(emission_params_boxed)
        self.emissions.params = unflatten(getval(flat_emission_params_boxed))

        # E step: compute the true posterior p(z | x, y, theta_fixed) and
        # the necessary expectations under this posterior.
        expectations = [
            self.expected_states(x, data, input, mask,
                                 tag) for x, data, input, mask, tag in zip(
                                     xs_unboxed, datas, inputs, masks, tags)
        ]

        # M step: maximize expected log joint wrt parameters
        # Note: Only do a partial update toward the M step for this sample of xs
        x_masks = [np.ones_like(x, dtype=bool) for x in xs_unboxed]
        for distn in [self.init_state_distn, self.transitions, self.dynamics]:
            curr_prms = copy.deepcopy(distn.params)
            distn.m_step(expectations, xs_unboxed, inputs, x_masks, tags,
                         **kwargs)
            distn.params = convex_combination(curr_prms, distn.params, alpha)

        # Box up the emission parameters again before computing the ELBO
        self.emissions.params = emission_params_boxed

        # Compute expected log likelihood E_q(z | x, y) [log p(z, x, y; theta)]
        for (Ez, Ezzp1, _), x, x_mask, data, mask, input, tag in \
            zip(expectations, xs, x_masks, datas, masks, inputs, tags):

            # Compute expected log likelihood (inner ELBO)
            log_pi0 = self.init_state_distn.log_initial_state_distn(
                x, input, x_mask, tag)
            log_Ps = self.transitions.log_transition_matrices(
                x, input, x_mask, tag)
            log_likes = self.dynamics.log_likelihoods(x, input, x_mask, tag)
            log_likes += self.emissions.log_likelihoods(
                data, input, mask, tag, x)

            elbo += np.sum(Ez[0] * log_pi0)
            elbo += np.sum(Ezzp1 * log_Ps)
            elbo += np.sum(Ez * log_likes)

        # -log q(x)
        elbo -= variational_posterior.log_density(xs)
        assert np.isfinite(elbo)

        return elbo