def _create_gaussian(self, gaussian_type):
     mu = tf.random_normal([3])
     if gaussian_type == tfp.distributions.MultivariateNormalDiag:
         scale_diag = tf.random_normal([3])
         dist = tfp.distributions.MultivariateNormalDiag(mu, scale_diag)
     if gaussian_type == tfp.distributions.MultivariateNormalDiagPlusLowRank:
         scale_diag = tf.random_normal([3])
         perturb_factor = tf.random_normal([3, 2])
         scale_perturb_diag = tf.random_normal([2])
         dist = tfp.distributions.MultivariateNormalDiagPlusLowRank(
             mu,
             scale_diag,
             scale_perturb_factor=perturb_factor,
             scale_perturb_diag=scale_perturb_diag)
     if gaussian_type == tfp.distributions.MultivariateNormalTriL:
         cov = tf.random_uniform([3, 3], minval=0, maxval=1.0)
         # Create a PSD matrix.
         cov = 0.5 * (cov + tf.transpose(cov)) + 3 * tf.eye(3)
         scale = tf.cholesky(cov)
         dist = tfp.distributions.MultivariateNormalTriL(mu, scale)
     if gaussian_type == tfp.distributions.MultivariateNormalFullCovariance:
         cov = tf.random_uniform([3, 3], minval=0, maxval=1.0)
         # Create a PSD matrix.
         cov = 0.5 * (cov + tf.transpose(cov)) + 3 * tf.eye(3)
         dist = tfp.distributions.MultivariateNormalFullCovariance(mu, cov)
     return (dist, mu, dist.covariance())
Beispiel #2
0
def sample_activations(acts, n_sample):
    """
    take n_sample samples from acts
    input: acts: GaussianVar [batch_size (b), hidden size (h)]
    """
    sigma_sqr = acts.var  # [b, h, h]
    sigma = tf.transpose(tf.cholesky(sigma_sqr), [0, 2, 1])  # [b, h, h]
    standard_samples = tf.random_normal(
        [tf.shape(sigma)[0], n_sample,
         tf.shape(sigma)[-1]])  # [b, n_sample, h]
    samples = tf.matmul(standard_samples, sigma) + tf.expand_dims(
        acts.mean, 1)  # [b, n_sample, h]
    return samples
Beispiel #3
0
def orthonorm_op(x, epsilon=1e-7):
    '''
    Computes a matrix that orthogonalizes the input matrix x

    x:      an n x d input matrix
    eps:    epsilon to prevent nonzero values in the diagonal entries of x

    returns:    a d x d matrix, ortho_weights, which orthogonalizes x by
                right multiplication
    '''
    x_2 = K.dot(K.transpose(x), x)
    x_2 += K.eye(K.int_shape(x)[1]) * epsilon
    L = tf.cholesky(x_2)
    ortho_weights = tf.transpose(tf.matrix_inverse(L)) * tf.sqrt(
        tf.cast(tf.shape(x)[0], dtype=K.floatx()))
    return ortho_weights
Beispiel #4
0
    def __init__(self, posts, **kwargs):
        FactorisedPosterior.__init__(self, posts, **kwargs)

        # The full covariance matrix is formed from the Cholesky decomposition
        # to ensure that it remains positive definite.
        #
        # To achieve this, we have to create PxP tensor variables for
        # each parameter vertex, but we then extract only the lower triangular
        # elements and train only on these. The diagonal elements
        # are constructed by the FactorisedPosterior
        if kwargs.get("init", None):
            # We are initializing from an existing posterior.
            # The FactorizedPosterior will already have extracted the mean and
            # diagonal of the covariance matrix - we need the Cholesky decomposition
            # of the covariance to initialize the off-diagonal terms
            self.log.info(" - Initializing posterior covariance from input posterior")
            _mean, cov = kwargs["init"]
            covar_init = tf.cholesky(cov)
        else:
            covar_init = tf.zeros([self.nvertices, self.nparams, self.nparams], dtype=tf.float32)

        self.off_diag_vars_base = self.log_tf(tf.Variable(covar_init, validate_shape=False,
                                                     name='%s_off_diag_vars' % self.name))
        if kwargs.get("suppress_nan", True):
            self.off_diag_vars = tf.where(tf.is_nan(self.off_diag_vars_base), tf.zeros_like(self.off_diag_vars_base), self.off_diag_vars_base)
        else:
            self.off_diag_vars = self.off_diag_vars_base
        self.off_diag_cov_chol = tf.matrix_set_diag(tf.matrix_band_part(self.off_diag_vars, -1, 0),
                                                    tf.zeros([self.nvertices, self.nparams]),
                                                    name='%s_off_diag_cov_chol' % self.name)

        # Combine diagonal and off-diagonal elements into full matrix
        self.cov_chol = tf.add(tf.matrix_diag(self.std), self.off_diag_cov_chol,
                               name='%s_cov_chol' % self.name)

        # Form the covariance matrix from the chol decomposition
        self.cov = tf.matmul(tf.transpose(self.cov_chol, perm=(0, 2, 1)), self.cov_chol,
                             name='%s_cov' % self.name)

        self.cov_chol = self.log_tf(self.cov_chol)
        self.cov = self.log_tf(self.cov)
Beispiel #5
0
  def get_y_values(self, x_values, num_total_points):
    # Set kernel parameters
    l1 = (
        tf.ones(shape=[self._batch_size, self._y_size, self._x_size]) *
        self._l1_scale)
    sigma_f = tf.ones(
        shape=[self._batch_size, self._y_size]) * self._sigma_scale

    # Pass the x_values through the Gaussian kernel
    # [batch_size, y_size, num_total_points, num_total_points]
    kernel = self._gaussian_kernel(x_values, l1, sigma_f)

    # Calculate Cholesky, using double precision for better stability:
    cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32)

    # Sample a curve
    # [batch_size, y_size, num_total_points, 1]
    y_values = tf.matmul(
        cholesky,
        tf.random_normal([self._batch_size, self._y_size, num_total_points, 1]))

    # [batch_size, num_total_points, y_size]
    y_values = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1])
    return y_values
Beispiel #6
0
  def generate_curves(self, num_context=None):
    """Builds the op delivering the data.

    Generated functions are `float32` with x values between -2 and 2.

    Args:
      num_context: Number of context points. If None, chosen randomly.

    Returns:
      A `CNPRegressionDescription` namedtuple.
    """
    if num_context is None:
      num_context = tf.random_uniform(
          shape=[], minval=3, maxval=self._max_num_context, dtype=tf.int32)

    # If we are testing we want to have more targets and have them evenly
    # distributed in order to plot the function.
    if self._testing:
      num_target = 400
      num_total_points = num_target
      x_values = tf.tile(
          tf.expand_dims(tf.range(-2., 2., 1. / 100, dtype=tf.float32), axis=0),
          [self._batch_size, 1])
      x_values = tf.expand_dims(x_values, axis=-1)
    # During training the number of target points and their x-positions are
    # selected at random
    else:
      num_target = tf.random_uniform(shape=(), minval=0,
                                     maxval=self._max_num_context - num_context,
                                     dtype=tf.int32)
      num_total_points = num_context + num_target
      x_values = tf.random_uniform(
          [self._batch_size, num_total_points, self._x_size], -2, 2)

    # Set kernel parameters
    # Either choose a set of random parameters for the mini-batch
    if self._random_kernel_parameters:
      l1 = tf.random_uniform([self._batch_size, self._y_size,
                              self._x_size], 0.1, self._l1_scale)
      sigma_f = tf.random_uniform([self._batch_size, self._y_size],
                                  0.1, self._sigma_scale)
    # Or use the same fixed parameters for all mini-batches
    else:
      l1 = tf.ones(shape=[self._batch_size, self._y_size,
                          self._x_size]) * self._l1_scale
      sigma_f = tf.ones(shape=[self._batch_size,
                               self._y_size]) * self._sigma_scale

    # Pass the x_values through the Gaussian kernel
    # [batch_size, y_size, num_total_points, num_total_points]
    kernel = self._gaussian_kernel(x_values, l1, sigma_f)

    # Calculate Cholesky, using double precision for better stability:
    cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32)

    # Sample a curve
    # [batch_size, y_size, num_total_points, 1]
    y_values = tf.matmul(
        cholesky,
        tf.random_normal([self._batch_size, self._y_size, num_total_points, 1]))

    # [batch_size, num_total_points, y_size]
    y_values = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1])

    if self._testing:
      # Select the targets
      target_x = x_values
      target_y = y_values

      # Select the observations
      idx = tf.random_shuffle(tf.range(num_target))
      context_x = tf.gather(x_values, idx[:num_context], axis=1)
      context_y = tf.gather(y_values, idx[:num_context], axis=1)

    else:
      # Select the targets which will consist of the context points as well as
      # some new target points
      target_x = x_values[:, :num_target + num_context, :]
      target_y = y_values[:, :num_target + num_context, :]

      # Select the observations
      context_x = x_values[:, :num_context, :]
      context_y = y_values[:, :num_context, :]

    return NPRegressionDescription(
        context_x=context_x,
        context_y=context_y,
        target_x=target_x,
        target_y=target_y)
Beispiel #7
0
    def create_model(self, x, y, *args):

        if self.process_y:

            self.f_mu = Regression().fit(x, y)
            self.Ymu = self.f_mu(x)
            self.Ys2 = np.std((y - self.Ymu))

            y = (y - self.Ymu) / self.Ys2

        self.t_X = tf.constant(x, dtype=self.dtype)
        self.t_Y = tf.constant(y, dtype=self.dtype)

        self.t_N = tf.shape(self.t_Y)[0]
        self.t_D = tf.shape(self.t_Y)[1]
        self.t_Q = tf.shape(self.t_X)[0]
        self.t_M = tf.shape(self.t_X)[1]

        self.M = x.shape[1]

        if self.kernel == 'Squared Exponential':

            self.kernel_function = self.sq_exp_kernel

            self.signal_var = self.init_variable(args[0][0], positive=True)
            self.lengthscale = self.init_variable([args[0][1]] * self.M,
                                                  positive=True,
                                                  multi=self.variable_l)
            self.noise_var = self.init_variable(args[0][2], positive=True)

            self.hparamd = ['Signal Variance', 'Lengthscale']
            self.hparams = [self.signal_var, self.lengthscale]

        if self.kernel == 'Periodic':

            self.kernel_function = self.sq_exp_kernel

            self.signal_var = self.init_variable(args[0][0], True)
            self.gamma = self.init_variable(args[0][0], True)
            self.period = self.init_variable(args[0][0], True)
            self.noise_var = self.init_variable(args[0][0], True)

            self.p_mu = self.init_variable(tf.log(self.t_Y), False)
            self.p_s2 = self.init_variable(1.0, True)

            self.hparamd = ['Signal Variance', 'Gamma', 'Period']
            self.hparams = [self.signal_var, self.gamma, self.period]

        self.create_kernel = lambda t_x1, t_x2: self.kernel_function(
            t_x1, t_x2, self.hparams)

        ### CREATING THE TRAINING MATRICES ###

        self.K_xx = self.create_kernel(self.t_X, self.t_X) + (
            self.noise_var + self.jitter) * tf.eye(self.t_N, dtype=self.dtype)

        self.L_xx = tf.cholesky(self.K_xx)

        self.logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.L_xx)))

        self.Kinv_YYt = 0.5 * tf.reduce_sum(
            tf.square(
                tf.matrix_triangular_solve(self.L_xx, self.t_Y, lower=True)))

        ### Initialising loose priors ###

        self.hprior = 0

        if self.variable_l:

            self.hprior += 0.5 * tf.square(tf.log(self.hparams[0]))

            self.hprior += tf.reduce_sum(0.5 *
                                         tf.square(tf.log(self.hparams[1])))

        else:

            for i in self.hparams:

                self.hprior += 0.5 * tf.square(tf.log(i))

        self.noise_prior = 0.5 * tf.square(tf.log(self.noise_var))

        ### Negative marginal log likelihood under Gaussian assumption ###

        if self.distribution == 'Gaussian':

            pi_term = tf.constant(0.5 * np.log(2.0 * np.pi), dtype=self.dtype)

            self.term1 = pi_term * tf.cast(self.t_D, dtype = self.dtype) * tf.cast(self.t_N, dtype = self.dtype) \
                               + 0.5 * tf.cast(self.t_D, dtype = self.dtype) * self.logdet \
                               + self.Kinv_YYt

        if self.distribution == 'Poisson' and self.kernel == 'Periodic':

            self.Kinv = tf.cholesky_solve(self.L_xx,
                                          tf.eye(self.t_N, dtype=self.dtype))

            self.term1 = -tf.reduce_sum(self.t_Y*self.p_mu - tf.exp(self.p_mu + self.p_s2/2)) \
            + (1/2)*(tf.trace(self.Kinv @ (self.p_s2*tf.eye(self.t_N, dtype=self.dtype) + [email protected](self.p_mu))) \
                     - tf.cast(self.t_N, dtype = self.dtype) + self.logdet - tf.cast(self.t_N, dtype = self.dtype)*tf.log(self.p_s2))

        self.objective = self.term1 + self.hprior + self.noise_prior
Beispiel #8
0
def base_conditional(Kmn,
                     Kmm,
                     Knn,
                     f,
                     *,
                     full_cov=False,
                     q_sqrt=None,
                     white=False):
    """
    Given a g1 and g2, and distribution p and q such that
      p(g2) = N(g2;0,Kmm)
      p(g1) = N(g1;0,Knn)
      p(g1|g2) = N(g1;0,Knm)
    And
      q(g2) = N(g2;f,q_sqrt*q_sqrt^T)
    This method computes the mean and (co)variance of
      q(g1) = \int q(g2) p(g1|g2)
    :param Kmn: M x N
    :param Kmm: M x M
    :param Knn: N x N  or  N
    :param f: M x R
    :param full_cov: bool
    :param q_sqrt: None or R x M x M (lower triangular)
    :param white: bool
    :return: N x R  or R x N x N
    """
    # compute kernel stuff
    num_func = tf.shape(f)[1]  # R
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = Knn - tf.matmul(A, A, transpose_a=True)
        fvar = tf.tile(fvar[None, :, :], [num_func, 1, 1])  # R x N x N
    else:
        fvar = Knn - tf.reduce_sum(tf.square(A), 0)
        fvar = tf.tile(fvar[None, :], [num_func, 1])  # R x N

    # another backsubstitution in the unwhitened case
    if not white:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(A, f, transpose_a=True)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # R x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = q_sqrt
            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # R x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # R x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # R x N

    if not full_cov:
        fvar = tf.transpose(fvar)  # N x R

    return fmean, fvar  # N x R, R x N x N or N x R