Example #1
0
    def get_dist(self, timesteps, samples=1, batch_size=1, fixed=False):
        locs = []
        scales = []
        sample_list = []

        # Add a time dimension
        e_c = tf.expand_dims(self._e_c, 0)
        e_scale = tf.expand_dims(self._e_scale, 0)
        p_scale = tf.expand_dims(self._p_scale, 0)

        sample = tf.expand_dims(tf.expand_dims(tf.zeros_like(e_c), 0), 0)
        sample = tf.tile(sample, [samples, batch_size, 1, 1])
        for _ in range(timesteps):
            loc = e_c + self._phi * sample
            scale = p_scale if _ > 0 else e_scale
            locs.append(loc)
            scales.append(scale)
            if self._offdiag:
                dist = tfd.MultivariateNormalTriL(loc=loc, scale_tril=scale)
            else:
                dist = tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)
            sample = dist.sample()
            sample_list.append(sample)

        sample = tf.concat(sample_list, axis=2)
        loc = tf.concat(locs, axis=2)
        scale = tf.concat(scales, axis=-2)
        if self._offdiag:
            dist = tfd.MultivariateNormalTriL(loc=loc, scale_tril=scale)
        else:
            dist = tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)

        dist = tfd.Independent(dist, reinterpreted_batch_ndims=1)
        return sample, dist
Example #2
0
def generate_fa_data(n_sample, n_factor, n_item, 
                     ld, psi = None, rho = None, 
                     dtype = tf.float64):
    if (n_item % n_factor) != 0:
        n_item = n_factor * (n_item // n_factor)
    loading = np.zeros((n_item, n_factor))
    item_per_factor = (n_item // n_factor)
    for i in range(n_factor):
        for j in range(i * item_per_factor,
                       (i + 1) * item_per_factor):
            loading[j, i] = ld
    loading = tf.constant(loading, dtype = dtype)
    if rho is None:
        cor = tf.eye(n_factor, dtype = dtype)
    else:
        unit = tf.ones((n_factor, 1), dtype = dtype)
        identity = tf.eye(n_factor, dtype = dtype)
        cor = rho * (unit @ tf.transpose(unit)) + (1 - rho) * identity
    if psi is None:
        uniqueness = 1 - tf.linalg.diag_part(loading @ cor @ tf.transpose(loading))
    else:
        uniqueness = psi * tf.ones((n_item, ), dtype = dtype)
    
    mean = tf.zeros(n_item, dtype = dtype)
    cov = loading @ cor @ tf.transpose(loading) + tf.linalg.diag(uniqueness)
    dist_x = tfd.MultivariateNormalTriL(
        loc = mean, scale_tril = tf.linalg.cholesky(cov))
    x = dist_x.sample(n_sample)
    return x
Example #3
0
def generate_2pl_data(n_sample, n_factor, n_item, 
                      alpha, beta, rho, 
                      dtype = tf.float64):
    if (n_item % n_factor) != 0:
        n_item = n_factor * (n_item // n_factor)
    item_per_factor = (n_item // n_factor)
    intercept = tf.fill((n_item,), value = tf.constant(alpha, dtype = dtype))
    loading = np.zeros((n_item, n_factor))
    for i in range(n_factor):
        for j in range(i * item_per_factor,
                       (i + 1) * item_per_factor):
            loading[j, i] = ld
    loading = tf.constant(loading, dtype = dtype)
    if rho is None:
        cor = tf.eye(n_factor, dtype = dtype)
    else:
        unit = tf.ones((n_factor, 1), dtype = dtype)
        identity = tf.eye(n_factor, dtype = dtype)
        cor = rho * (unit @ tf.transpose(unit)) + (1 - rho) * identity
    dist_eta = tfd.MultivariateNormalTriL(
        loc = tf.zeros(n_factor, dtype = dtype), scale_tril = tf.linalg.cholesky(cor))
    eta = dist_eta.sample(n_sample)
    logits = intercept + eta @ tf.transpose(loading)
    x = tfd.Bernoulli(logits=logits, dtype=dtype).sample()
    return x
    def _build(self, inputs):
        mean, covariance, scale, L = self.create_mean_n_cov_layers(inputs)

        mean_t = mean
        covariance_t = covariance

        self.set_contractive_regularizer(
            mean, covariance_t, self._contractive_regularizer_inputs,
            self._contractive_regularizer_tuple,
            self._contractive_collection_network_str)

        # You might wonder why we use cholesky here, if we already have the covariance matrix.
        # The reason is, that because the matrix is ill conditioned often,
        # after inverting the precision matrix the cov can become asymetric because of numerical issues,
        # and the validate arguments gives an error.
        # If we give its cholesky decomposition, it stays symmetric.

        output_distribution = tfd.MultivariateNormalTriL(loc=mean_t,
                                                         scale_tril=L,
                                                         validate_args=True)

        # add reconstruction_node method (needed to some sort of mean or median to get reconstructions without sampling)
        def reconstruction_node(self):
            return self.mean()

        output_distribution.reconstruction_node = types.MethodType(
            reconstruction_node, output_distribution)

        self.mean = mean
        return output_distribution
Example #5
0
    def _build(self, inputs):
        mean, covariance, scale, L = self.create_mean_n_cov_layers(inputs)

        mean_t = mean
        covariance_t = covariance

        self.set_contractive_regularizer(
            mean, covariance, self._contractive_regularizer_inputs,
            self._contractive_regularizer_tuple,
            self._contractive_collection_network_str)

        # output_distribution = MultivariateNormalTriLChannelFlipped(loc=mean_t, scale_tril=L, validate_args=True)
        output_distribution = tfd.MultivariateNormalTriL(loc=mean_t,
                                                         scale_tril=L,
                                                         validate_args=True)

        # add reconstruction_node method (needed to some sort of mean or median to get reconstructions without sampling)
        def reconstruction_node(self):
            return self.mean()

        output_distribution.reconstruction_node = types.MethodType(
            reconstruction_node, output_distribution)

        self.mean = mean
        return output_distribution
Example #6
0
def generate_grm_data(n_sample, n_factor, n_item,
                      nu, ld, rho,
                      dtype = tf.float64):
    if (n_item % n_factor) != 0:
        n_item = n_factor * (n_item // n_factor)
    item_per_factor = (n_item // n_factor)
    n_category = len(nu) + 1
    intercept = tf.tile(tf.constant([nu], dtype = dtype),
                        multiples = [n_item, 1])
    loading = np.zeros((n_item, n_factor))
    for i in range(n_factor):
        for j in range(i * item_per_factor,
                       (i + 1) * item_per_factor):
            loading[j, i] = ld
    loading = tf.constant(loading, dtype = dtype)
    if rho is None:
        cor = tf.eye(n_factor, dtype = dtype)
    else:
        unit = tf.ones((n_factor, 1), dtype = dtype)
        identity = tf.eye(n_factor, dtype = dtype)
        cor = rho * (unit @ tf.transpose(unit)) + (1 - rho) * identity
    dist_eta = tfd.MultivariateNormalTriL(
        loc = tf.zeros(n_factor, dtype = dtype),
        scale_tril = tf.linalg.cholesky(cor))
    eta = dist_eta.sample(n_sample)
    c, d = create_cd(n_category, dtype)
    probs = grm_irf(eta, intercept, loading, c, d)
    x = tfd.Categorical(probs=probs, dtype=dtype).sample()
    return x
Example #7
0
 def fbar_prior(self, fbar, v, l2):
     m, K = self.fbar_prior_params(v, l2)
     prob = 0
     for r in range(self.num_replicates):
         prob += tfd.MultivariateNormalTriL(
             loc=m, scale_tril=tf.linalg.cholesky(K)).log_prob(fbar[r, 0])
     return prob
Example #8
0
 def _init_distribution(conditions):
     loc, covariance_matrix = conditions["loc"], conditions["covariance_matrix"]
     try:
         chol_cov_matrix = tf.linalg.cholesky(covariance_matrix)
     except tf.errors.InvalidArgumentError:
         raise ValueError("Cholesky decomposition failed! Check your `covariance_matrix`.")
     return tfd.MultivariateNormalTriL(loc=loc, scale_tril=chol_cov_matrix)
    def _build_cross_ent(self, weights, means, chol_covars, kernel_chol):
        """Construct the cross-entropy.

        Args:
            weights: shape: (num_components)
            means: shape: (num_components, num_latents, num_inducing)
            chol_covars: shape: (num_components, num_latents, num_inducing[, num_inducing])
            kernel_chol: shape: (num_latents, num_inducing, num_inducing)
        Returns:
            Cross entropy as scalar
        """
        if self.args['diag_post']:
            # TODO(karl): this is a bit inefficient since we're not making use of the fact
            # that chol_covars is diagonal. A solution most likely involves a custom tf op.

            # shape of trace: (num_components, num_latents)
            trace = tfl.trace(
                util.cholesky_solve_br(kernel_chol, tfl.diag(chol_covars)))
        else:
            trace = tf.reduce_sum(input_tensor=util.mul_sum(
                util.cholesky_solve_br(kernel_chol, chol_covars), chol_covars),
                                  axis=-1)

        # sum_val has the same shape as weights
        gaussian = tfd.MultivariateNormalTriL(means, kernel_chol)
        sum_val = tf.reduce_sum(input_tensor=gaussian.log_prob([0.0]) -
                                0.5 * trace,
                                axis=-1)

        # weighted sum of weights and sum_val
        cross_ent = util.mul_sum(weights, sum_val)

        return cross_ent
Example #10
0
def get_pdf(param_vec, vehicle_type):
    # see https://ericmjl.github.io/blog/2019/5/29/reasoning-about-shapes-and-probability-distributions/
    # for info on shapes
    if vehicle_type == 'other_vehicle':
        alpha, mus, sigmas = slice_pvector(param_vec, vehicle_type) # Unpack parameter vectors
        mvn = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(probs=alpha),
            components_distribution=tfd.Normal(
                loc=mus,
                scale=sigmas))

    if vehicle_type == 'merge_vehicle':
        alphas, mus_long, sigmas_long, mus_lat, \
                            sigmas_lat, rhos = slice_pvector(param_vec, vehicle_type)


        cov = get_CovMatrix(rhos, sigmas_long, sigmas_lat)
        mus = tf.stack([mus_long, mus_lat], axis=3, name='mus')
        mvn = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(
                probs=alphas),
            components_distribution=tfd.MultivariateNormalTriL(
                loc=mus,
                scale_tril=tf.linalg.cholesky(cov), name='MultivariateNormalTriL'))
    # print('mus shape: ', mus.shape)
    return mvn
Example #11
0
 def get_prior_mu(beta0, m0, lambdas):
     precisions = (lambdas*beta0)
     covs = tf.linalg.inv(precisions)
     covs = 0.5*(covs + tf.transpose(covs, [0, 1, 3, 2])) # numerical stability workaround
     d = tfd.MultivariateNormalTriL(loc=m0, 
                                    scale_tril=tf.linalg.cholesky(covs))
     return d
Example #12
0
 def get_norm_log_probs(mus, lambdas, X):
     ilambdas = tf.linalg.inv(lambdas)
     ilambdas = 0.5*(ilambdas + tf.transpose(ilambdas, [0, 1, 3, 2])) # numerical stability workaround
     d = tfd.MultivariateNormalTriL(loc=mus, 
                     scale_tril=tf.linalg.cholesky(ilambdas))
     x_log_probs = [d.log_prob(X[n]) for n in range(X.shape[0])]
     return tf.stack(x_log_probs, 1)     
Example #13
0
 def get_posterior_mu(beta, mu, lambdas):
     locations = np.broadcast_to(mu, lambdas.shape[0:1]+mu.shape)
     precisions = (lambdas*beta[None,:,None,None])
     covs = tf.linalg.inv(precisions) #!
     covs = 0.5*(covs + tf.transpose(covs, [0, 1, 3, 2])) # numerical stability workaround
     d = tfd.MultivariateNormalTriL(loc=locations, 
                                    scale_tril=tf.linalg.cholesky(covs))
     return d
Example #14
0
 def fbar_prior(self, fbar, param_0bar, param_1bar):
     m, K = self.kernel_selector()(param_0bar, param_1bar)
     jitter = tf.linalg.diag(1e-8 *tf.ones(self.N_p, dtype='float64'))
     prob = 0
     for r in range(self.num_replicates):
         for i in range(self.num_tfs):
             prob += tfd.MultivariateNormalTriL(loc=m, scale_tril=tf.linalg.cholesky(K[i]+jitter)).log_prob(fbar[r, i])
     return prob
Example #15
0
    def get_dist(self, timesteps, samples=1, batch_size=1, fixed=True):
        """
        Samples from self.cell `timesteps` times.
        On each step, the previous (sample, state) is fed back into the cell
        (zero_state used for 0th step).

        The cell returns a multivariate normal diagonal distribution for each timestep.
        We collect each timestep-dist's params (loc and scale), then use them to create
        the return value: a single MVN diag dist that has a dimension for timesteps.

        The cell returns a full dist for each timestep so that we can 'sample' it.
        If our sample size is 1, and our cell is an RNN cell, then this is roughly equivalent
        to doing a generative RNN (init state = zeros, return_sequences=True) then passing
        those values through a pair of Dense layers to parameterize a single MVNDiag.

        Args:
            timesteps: Number of times to sample from the dynamic_prior_cell. Output will have
            samples: Number of samples to draw from the latent distribution.
            batch_size: Number of sequences to sample.
            fixed: Boolean for whether or not to share the same random
                    sample across all sequences in batch.
https://github.com/tensorflow/probability/blob/698e0101aecf46c42858db7952ee3024e091c291/tensorflow_probability/examples/disentangled_vae.py#L887
        Returns:

        """
        if fixed:
            sample_batch_size = 1
        else:
            sample_batch_size = batch_size

        sample, state = self.cell.zero_state([samples, sample_batch_size])
        locs = []
        scales = []
        sample_list = []
        scale_parm_name = "scale_tril" if self.cell.offdiag else "scale_diag"  # TODO: Check this for offdiag
        for _ in range(timesteps):
            dist, state = self.cell(sample, state)
            sample = dist.sample()
            locs.append(dist.parameters["loc"])
            scales.append(dist.parameters[scale_parm_name])
            sample_list.append(sample)

        sample = tf.stack(sample_list, axis=2)
        loc = tf.stack(locs, axis=2)
        scale = tf.stack(scales, axis=2)

        if fixed:  # tile along the batch axis
            sample = sample + tf.zeros([batch_size, 1, 1])

        if self.cell.offdiag:
            dist = tfd.MultivariateNormalTriL(loc=loc, scale_tril=scale)
        else:
            dist = tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)

        dist = tfd.Independent(dist, reinterpreted_batch_ndims=1)
        return sample, dist
Example #16
0
 def __call__(self):
     """Get the distribution object from the backend"""
     if get_backend() == 'pytorch':
         import torch.distributions as tod
         return tod.multivariate_normal.MultivariateNormal(
             self['loc'], covariance_matrix=self['cov'])
     else:
         import tensorflow as tf
         from tensorflow_probability import distributions as tfd
         tril = tf.linalg.cholesky(self['cov'])
         return tfd.MultivariateNormalTriL(loc=self['loc'], scale_tril=tril)
Example #17
0
    def log_likelihood_tf(data):
        data_ph = tf.placeholder(dtype=tf.float32, shape=data.shape)
        mean_ph = tf.placeholder(dtype=tf.float32, shape=(data.shape[1], ))
        cov_chol_ph = tf.placeholder(dtype=tf.float32,
                                     shape=(data.shape[1], data.shape[1]))

        dist = tpd.MultivariateNormalTriL(loc=mean_ph, scale_tril=cov_chol_ph)
        return dist.log_prob(data_ph), {
            'mean': mean_ph,
            'cov_chol': cov_chol_ph,
            'data': data_ph
        }
Example #18
0
 def normal_sampler_fn(seed):
     p1, p2 = all_states[self.state_indices['kernel_params']]
     m, K = self.kernel_selector()(logit(p1), logit(p2))
     m = tf.zeros((self.num_replicates, self.num_tfs, self.N_p),
                  dtype='float64')
     K = tf.stack([K for _ in range(3)], axis=0)
     jitter = tf.linalg.diag(1e-8 * tf.ones(self.N_p, dtype='float64'))
     z = tfd.MultivariateNormalTriL(
         loc=m,
         scale_tril=tf.linalg.cholesky(K + jitter)).sample(seed=seed)
     # tf.print(z)
     return z
Example #19
0
 def qx(self):
     if self._qx is None:
         if self.Xchol.shape.ndims == 1:
             self._qx = tfd.MultivariateNormalDiag(loc=tf.reshape(
                 self.X, [-1]),
                                                   scale_diag=self.Xchol)
         else:
             self._qx = tfd.MultivariateNormalTriL(
                 loc=self.X if self.Xchol.shape.ndims == 3 else tf.reshape(
                     self.X, [-1]),
                 scale_tril=self.Xchol)
     return self._qx
Example #20
0
def loss(y_est, y):
    y = tf.cast(y, dtype=tf.float32)
    ######### Your code starts here #########
    # We want to compute the negative log-likelihood loss between y_est and y where
    # - y_est is the output of the network for a batch of observations,
    # - y is the actions the expert took for the corresponding batch of observations
    # At the end your code should return the scalar loss value.
    # HINT: You may find the classes of tensorflow_probability.distributions (imported as tfd) useful.
    #       In particular, we used MultivariateNormalTriL, but it is not the only way.
    mvn = tfd.MultivariateNormalTriL(loc=y_est[:, :2],
                                     scale_tril=tfp.math.fill_triangular(
                                         y_est[:, 2:]))
    return -tf.math.reduce_mean(mvn.log_prob(y))
Example #21
0
 def qx(self):
     if self._qx is None:
         self._qx = []
         for s in range(self.n_seq):
             if self.Xchol[s].shape.ndims == 1:
                 self._qx.append(
                     tfd.MultivariateNormalDiag(loc=tf.reshape(
                         self.X[s], [-1]),
                                                scale_diag=self.Xchol[s]))
             else:
                 self._qx.append(
                     tfd.MultivariateNormalTriL(
                         loc=self.X[s] if self.Xchol[s].shape.ndims == 3
                         else tf.reshape(self.X[s], [-1]),
                         scale_tril=self.Xchol[s]))
     return self._qx
Example #22
0
    def density(self, xi, t=0):
        x, dx = xi[:, :self._u_dim], xi[:, self._u_dim:]

        ys = [f(x) for f in self.fs]  # transform state
        js = [j(x) for j in self.js]  # get jacobians
        dys = [j.matvec(dx) for j in js]  # get velocities in transformed space

        # "forces" in transformed space from the different policies
        fys_locs_covs = [
            self.pis[i](ys[i], dys[i], t) for i in range(self.n_experts)
        ]

        # separate locs and covs
        fys_locs = [_y[0] for _y in fys_locs_covs]
        fys_covs = [_y[1] for _y in fys_locs_covs]

        # "forces" in original space
        fxs = [
            js[i].matvec(fys_locs[i], adjoint=True)
            for i in range(self.n_experts)
        ]

        # covariances "forces" in original space
        fxs_covs = [
            matquad(js[i], fys_covs[i], adjoint=True)
            for i in range(self.n_experts)
        ]

        # precisions with regularization
        fxs_precs = [
            tf.linalg.inv(cov + self._reg**2 * tf.eye(self.experts_size[i]))
            for i, cov in enumerate(fxs_covs)
        ]

        # compute product of Gaussian policies
        precs = tf.reduce_sum(fxs_precs, axis=0)
        covs = tf.linalg.inv(precs)
        locs = [
            tf.linalg.LinearOperatorFullMatrix(fxs_precs[i]).matvec(fxs[i])
            for i in range(self.n_experts)
        ]
        locs = tf.linalg.LinearOperatorFullMatrix(covs).matvec(
            tf.reduce_sum(locs, axis=0))

        return ds.MultivariateNormalTriL(locs, tf.linalg.cholesky(covs))
Example #23
0
    def __call__(self, x):
        mapped = self.net(x)

        batch_size = mapped.shape.as_list()[0]
        time_length = mapped.shape.as_list()[1]

        # Obtain mean and precision matrix components
        num_dim = len(mapped.shape.as_list())
        perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2]
        mapped_transposed = tf.transpose(mapped, perm=perm)
        mapped_mean = mapped_transposed[:, :self.z_size]
        mapped_covar = mapped_transposed[:, self.z_size:]

        # tf.nn.sigmoid provides more stable performance on Physionet dataset
        if self.data_type == 'physionet':
            mapped_covar = tf.nn.sigmoid(mapped_covar)
        else:
            mapped_covar = tf.nn.softplus(mapped_covar)

        mapped_reshaped = tf.reshape(mapped_covar, [batch_size, self.z_size, 2*time_length])

        dense_shape = [batch_size, self.z_size, time_length, time_length]
        idxs_1 = np.repeat(np.arange(batch_size), self.z_size*(2*time_length-1))
        idxs_2 = np.tile(np.repeat(np.arange(self.z_size), (2*time_length-1)), batch_size)
        idxs_3 = np.tile(np.concatenate([np.arange(time_length), np.arange(time_length-1)]), batch_size*self.z_size)
        idxs_4 = np.tile(np.concatenate([np.arange(time_length), np.arange(1,time_length)]), batch_size*self.z_size)
        idxs_all = np.stack([idxs_1, idxs_2, idxs_3, idxs_4], axis=1)

        # ~10x times faster on CPU then on GPU
        with tf.device('/cpu:0'):
            # Obtain covariance matrix from precision one
            mapped_values = tf.reshape(mapped_reshaped[:, :, :-1], [-1])
            prec_sparse = tf.sparse.SparseTensor(indices=idxs_all, values=mapped_values, dense_shape=dense_shape)
            prec_sparse = tf.sparse.reorder(prec_sparse)
            prec_tril = tf.sparse_add(tf.zeros(prec_sparse.dense_shape, dtype=tf.float32), prec_sparse)
            eye = tf.eye(num_rows=prec_tril.shape.as_list()[-1], batch_shape=prec_tril.shape.as_list()[:-2])
            prec_tril = prec_tril + eye
            cov_tril = tf.linalg.triangular_solve(matrix=prec_tril, rhs=eye, lower=False)
            cov_tril = tf.where(tf.math.is_finite(cov_tril), cov_tril, tf.zeros_like(cov_tril))

        num_dim = len(cov_tril.shape)
        perm = list(range(num_dim - 2)) + [num_dim - 1, num_dim - 2]
        cov_tril_lower = tf.transpose(cov_tril, perm=perm)
        z_dist = tfd.MultivariateNormalTriL(loc=mapped_mean, scale_tril=cov_tril_lower)
        return z_dist
Example #24
0
 def get_dist(self, timesteps, samples=1, batch_size=1):
     """
     Tiles the saved loc and scale to the same shape as `posterior` then uses them to
     create a MVN dist with appropriate shape. Each timestep has the same loc and
     scale but if it were sampled then each timestep would return different values.
     Args:
         timesteps:
         samples:
         batch_size:
     Returns:
         MVNDiag distribution of the same shape as `posterior`
     """
     loc = tf.tile(tf.expand_dims(self._loc, 0), [timesteps, 1])
     scale = tf.expand_dims(self._scale, 0)
     if self._offdiag:
         scale = tf.tile(scale, [timesteps, 1, 1])
         dist = tfd.MultivariateNormalTriL(loc=loc, scale_tril=scale)
     else:
         scale = tf.tile(scale, [timesteps, 1])
         dist = tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale)
     dist = tfd.Independent(dist, reinterpreted_batch_ndims=1)
     return dist.sample([samples, batch_size]), dist
Example #25
0
def pdf_2D(z, density_name=''):
    assert density_name in AVAILABLE_2D_DISTRIBUTIONS, "Incorrect density name."
    if density_name == '':
        return 1
    elif density_name == 'banana':
        z1, z2 = z[:, 0], z[:, 1]
        mu = np.array([0.5, 0.5], dtype='float32')
        cov = np.array([[0.06, 0.055], [0.055, 0.06]], dtype='float32')
        scale = tf.linalg.cholesky(cov)
        p = tfd.MultivariateNormalTriL(loc=mu, scale_tril=scale)
        z2 = z1**2 + z2
        z1, z2 = tf.expand_dims(z1, 1), tf.expand_dims(z2, 1)
        z = tf.concat([z1, z2], axis=1)
        return p.prob(z)
    elif density_name == 'circle':
        z1, z2 = z[:, 0], z[:, 1]
        norm = (z1**2 + z2**2)**0.5
        exp1 = math.exp(-0.2 * ((z1 - 2) / 0.8)**2)
        exp2 = math.exp(-0.2 * ((z1 + 2) / 0.8)**2)
        u = 0.5 * ((norm - 4) / 0.4)**2 - math.log(exp1 + exp2)
        return math.exp(-u)
    elif density_name == 'eight_schools':
        y_i = 0
        sigma_i = 10
        thetas, mu, log_tau = z[:, 0], z[:, 1], z[:, 2]
        likelihood = tfd.Normal(loc=thetas, scale=sigma_i)
        prior_theta = tfd.Normal(loc=mu, scale=math.exp(log_tau))
        prior_mu = tfd.Normal(loc=0, scale=5)
        prior_tau = tfd.HalfCauchy(loc=0, scale=5)
        return likelihood.prob(y_i) * prior_theta.prob(thetas) * prior_mu.prob(
            mu) * prior_tau.prob(math.exp(log_tau)) * math.exp(log_tau)
    elif density_name == 'figure_eight':
        mu1 = 1 * np.array([-1, -1], dtype='float32')
        mu2 = 1 * np.array([1, 1], dtype='float32')
        scale = 0.45 * np.array([1, 1], dtype='float32')
        pi = 0.5
        comp1 = tfd.MultivariateNormalDiag(loc=mu1, scale_diag=scale)
        comp2 = tfd.MultivariateNormalDiag(loc=mu2, scale_diag=scale)
        return (1 - pi) * comp1.prob(z) + pi * comp2.prob(z)
Example #26
0
    def _build(self, inputs):

        inputs = tf.layers.flatten(inputs)

        self.dense_loc = snt.Linear(self._output_size, **self._extra_kwargs)
        self.dense_diag_params = snt.Linear(self._output_size, **self._extra_kwargs)
        n_out_of_diag_elems = int(self._output_size * (self._output_size - 1) / 2)
        self.dense_out_of_diag_params = snt.Linear(n_out_of_diag_elems, **self._extra_kwargs)


        loc = self.dense_loc(inputs)
        diag_params = self.dense_diag_params(inputs)
        out_of_diag_params = self.dense_out_of_diag_params(inputs)

        lower_triangle = tf.contrib.distributions.fill_triangular(out_of_diag_params)
        lower_triangle = tf.pad(lower_triangle, [[0, 0], [1, 0], [0, 1]])

        diag_positive = self._minimal_covariance + tf.nn.softplus(diag_params)

        scale_tril = tf.linalg.set_diag(lower_triangle, diag_positive)

        dtype = inputs.dtype
        n_tril = n_out_of_diag_elems + self._output_size
        self._calibration_tril_params = tf.get_variable("calibration_tril_params",
                                                        shape=(n_tril,),
                                                        dtype=dtype,
                                                        trainable=False,
                                                        initializer=tf.initializers.constant(value=1.))

        self.calibration_tril = tf.contrib.distributions.fill_triangular(self._calibration_tril_params, name="calibration_tril")


        ouput_params = {"loc" : loc, "scale_tril" : tf.multiply(self.calibration_tril, scale_tril)}

        distr = tfd.MultivariateNormalTriL(**ouput_params)

        return distr
Example #27
0
    def density(self, xi, t=0):
        ys = [f(xi) for f in self.fs]  # transform state
        js = [j(xi) for j in self.js]  # get jacobians

        # "velocities" in transformed space from the different policies
        fys_locs_covs = [self.pis[i](ys[i], t) for i in range(self.n_experts)]

        # separate locs and covs
        fys_locs = [_y[0] for _y in fys_locs_covs]
        fys_covs = [_y[1] for _y in fys_locs_covs]

        # precisions with regularization J^T Lambda
        fys_precs = [
            tf.linalg.inv(fys_covs[i] +
                          self._reg**2 * tf.eye(self.experts_size[i]))
            for i in range(self.n_experts)
        ]

        fxs_eta = [
            tf.linalg.LinearOperatorFullMatrix(js[i].matmul(
                fys_precs[i], adjoint=True)).matvec(fys_locs[i])
            for i in range(self.n_experts)
        ]

        fxs_precs = [
            matquad(js[i], fys_precs[i]) for i in range(self.n_experts)
        ]

        # compute product of Gaussian policies
        precs = tf.reduce_sum(fxs_precs, axis=0)

        covs = tf.linalg.inv(precs)

        etas = tf.reduce_sum(fxs_eta, axis=0)
        locs = tf.linalg.LinearOperatorFullMatrix(covs).matvec(etas)

        return ds.MultivariateNormalTriL(locs, tf.linalg.cholesky(covs))
Example #28
0
 def _init_distribution(conditions, **kwargs):
     loc, scale_tril = conditions["loc"], conditions["scale_tril"]
     return tfd.MultivariateNormalTriL(loc=loc, scale_tril=scale_tril, **kwargs)
    def _build_entropy(self, weights, means, chol_covars):
        """Construct entropy.

        Args:
            weights: shape: (num_components)
            means: shape: (num_components, num_latents, num_inducing)
            chol_covars: shape: (num_components, num_latents, num_inducing[, num_inducing])
        Returns:
            Entropy (scalar)
        """

        # This part is to compute the product of the pdf of normal distributions
        """
        chol_component_covar = []
        component_mean = []
        component_covar =[]
        covar_shape = tf.shape(chol_covars)[-2:]
        mean_shape = tf.shape(means)[-1:]

        # \Sigma_new = (\sum_{i=1}^{num_latents}( \Sigma_i^-1) )^{-1}
        # \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i)
        for i in range(self.num_components):
            temp_cov = tf.zeros(covar_shape)
            temp_mean = tf.zeros(mean_shape)[..., tf.newaxis]

            for k in range(self.num_latents):
                # Compute the sum of (\Sigma_i)^{-1}
                temp_cov += tf.cholesky_solve(chol_covars[i, k, :, :], tf.eye(covar_shape[0]))
                # Compute the sum of (\Sigma_i)^{-1} * \mu_i
                temp_mean += tf.cholesky_solve(chol_covars[i, k, :, :],
                                               means[i, k, :, tf.newaxis])

            # Compute \Sigma_new = temp_cov^{-1}
            temp_chol_covar = tf.cholesky(temp_cov)
            temp_component_covar = tf.cholesky_solve(temp_chol_covar, tf.eye(covar_shape[0]))
            component_covar.append(temp_component_covar)
            # Compute \Mu_new = \Sigma_new * (\sum_{i=1}^{num_latents} \Sigma_i^{-1} * \mu_i)
            temp_component_mean = temp_component_covar @ temp_mean
            component_mean.append(temp_component_mean)

            # Some functions need cholesky of \Sigma_new
            chol_component_covar.append(tf.cholesky(temp_component_covar))

        chol_component_covar = tf.stack(chol_component_covar, 0)
        component_covar = tf.stack(component_covar, 0)
        component_mean = tf.squeeze(tf.stack(component_mean, 0), -1)
        """
        # First build a square matrix of normals.
        if self.args['diag_post']:
            # construct normal distributions for all combinations of components
            variational_dist = tfd.MultivariateNormalDiag(
                means,
                tf.sqrt(chol_covars[tf.newaxis, ...] +
                        chol_covars[:, tf.newaxis, ...]))
        else:
            if self.args['num_components'] == 1:
                # Use the fact that chol(S + S) = sqrt(2) * chol(S)
                chol_covars_sum = tf.sqrt(2.) * chol_covars[tf.newaxis, ...]
            else:
                # Here we use the original component_covar directly
                # TODO: Can we just stay in cholesky space somehow?
                component_covar = util.mat_square(chol_covars)
                chol_covars_sum = tfl.cholesky(
                    component_covar[tf.newaxis, ...] +
                    component_covar[:, tf.newaxis, ...])
            # The class MultivariateNormalTriL only accepts cholesky decompositions of covariances
            variational_dist = tfd.MultivariateNormalTriL(
                means[tf.newaxis, ...], chol_covars_sum)

        # compute log probability of all means in all normal distributions
        # then sum over all latent functions
        # shape of log_normal_probs: (num_components, num_components)
        log_normal_probs = tf.reduce_sum(
            input_tensor=variational_dist.log_prob(means[:, tf.newaxis, ...]),
            axis=-1)

        # Now compute the entropy.
        # broadcast `weights` into dimension 1, then do `logsumexp` in that dimension
        weighted_logsumexp_probs = tf.reduce_logsumexp(
            input_tensor=tfm.log(weights) + log_normal_probs, axis=1)
        # multiply with weights again and then sum over it all
        return -util.mul_sum(weights, weighted_logsumexp_probs)
Example #30
0
    def run(self, its=None, samples=100, threshold=0.001):
        """ Run the VI optimisation.
        
        its: Number of iterations. Set its to None to automatically stop
        when the ELBO has reduced by less than threshold percent
        (between rolling averages of the last 50 calculations
        and the 50 before that).
        samples: Number of samples for the stochastic sampling of the
        gradient
        threshold: if its is None, this is the percentage change between
        the rolling average, over 50 iterations. Default: 0.001 (0.1%).
        """
        elbo_record = []
        it = 0
        print("Starting Run")
        try:
            while (its is None) or (it < its):
                it += 1
                with tf.GradientTape() as tape:
                    qu = tfd.MultivariateNormalTriL(self.mu[:, 0], self.scale)
                    samps = self.sm.get_samples(self.mu, self.scale, samples)
                    scaled = tf.concat([
                        self.transform_fn(samps[:, :, ::2], self.Y[:, 0:1],
                                          self.sideY),
                        self.transform_fn(samps[:, :, 1::2], self.Y[:, 1:2],
                                          self.sideY)
                    ], 2)
                    scaled = (scaled * (1 - self.ref)) + (self.Y * self.ref)

                    if self.mulike is not None:  #if we have non-stationary likelihood variance...
                        qulike = tfd.MultivariateNormalTriL(
                            self.mulike[:, 0], self.scalelike)
                        like = self.smlike.get_samples(self.mulike,
                                                       self.scalelike, samples)
                        ell = tf.reduce_mean(
                            tf.reduce_sum(
                                self.likelihoodfn_nonstationary(
                                    scaled[:, :, 0], scaled[:, :, 1],
                                    like[:, :, 0] * (1 - self.ref[:, 0]) -
                                    1000 * self.ref[:, 0],
                                    like[:, :, 1] * (1 - self.ref[:, 1]) -
                                    1000 * self.ref[:, 1]), 1))
                    else:  #stationary likelihood variance
                        ell = tf.reduce_mean(
                            tf.reduce_sum(
                                self.likelihoodfn(scaled[:, :, 0],
                                                  scaled[:, :, 1]), 1))

                    elbo_loss = -ell + tfd.kl_divergence(qu, self.pu)

                    if self.likemodel == 'process':
                        assert self.mulike is not None
                        assert self.scalelike is not None
                        elbo_loss += tfd.kl_divergence(qulike, self.pulike)
                    if self.likemodel == 'distribution':
                        assert self.mulike is not None
                        elbo_loss -= self.pulike.log_prob(self.mulike[:, 0])

                    if it % 20 == 0: print("%d (ELBO=%0.4f)" % (it, elbo_loss))

                    if (self.mulike is None) or (it % 50 <
                                                 25):  #optimise latent fns
                        gradients = tape.gradient(elbo_loss,
                                                  [self.mu, self.scale])
                        self.optimizer.apply_gradients(
                            zip(gradients, [self.mu, self.scale]))
                    else:  #this optimises the likelihood...
                        if self.likemodel == 'distribution':
                            gradients = tape.gradient(elbo_loss, [self.mulike])
                            self.likeoptimizer.apply_gradients(
                                zip(gradients, [self.mulike]))
                        if self.likemodel == 'process':
                            gradients = tape.gradient(
                                elbo_loss, [self.mulike, self.scalelike])
                            self.likeoptimizer.apply_gradients(
                                zip(gradients, [self.mulike, self.scalelike]))

                    elbo_record.append(elbo_loss)
                if its is None:
                    if it > 100:
                        oldm = np.median(elbo_record[-100:-50])
                        m = np.median(elbo_record[-50:])
                        if np.abs((oldm - m) / ((oldm + m) / 2)) < threshold:
                            #check that nothing weird's happened!
                            if np.std(elbo_record[-50:]) < np.std(
                                    elbo_record[-100:-50]):
                                break
        except KeyboardInterrupt:
            pass
        return np.array(elbo_record)