def test_q_sqrt_constraints(inducing_points, kernel, mu, white):
    """ Test that sending in an unconstrained q_sqrt returns the same conditional
    evaluation and gradients. This is important to match the behaviour of the KL, which
    enforces q_sqrt is triangular.
    """

    tril = np.tril(rng.randn(Ln, Nn, Nn))

    q_sqrt_constrained = Parameter(tril, transform=triangular())
    q_sqrt_unconstrained = Parameter(tril)

    diff_before_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy()
    assert_allclose(diff_before_gradient_step, 0)

    kls = []
    for q_sqrt in [q_sqrt_constrained, q_sqrt_unconstrained]:

        with tf.GradientTape() as tape:
            kl = prior_kl(inducing_points, kernel, mu, q_sqrt, whiten=white)

        grad = tape.gradient(kl, q_sqrt.unconstrained_variable)
        q_sqrt.unconstrained_variable.assign_sub(grad)
        kls.append(kl)

    diff_kls_before_gradient_step = kls[0] - kls[1]

    assert_allclose(diff_kls_before_gradient_step, 0)

    diff_after_gradient_step = (q_sqrt_constrained - q_sqrt_unconstrained).numpy()
    assert_allclose(diff_after_gradient_step, 0)
예제 #2
0
def test_q_sqrt_constraints(Xdata, Xnew, kernel, mu, white):
    """ Test that sending in an unconstrained q_sqrt returns the same conditional
    evaluation and gradients. This is important to match the behaviour of the KL, which
    enforces q_sqrt is triangular.
    """

    tril = np.tril(rng.randn(Ln, Nn, Nn))

    q_sqrt_constrained = Parameter(tril, transform=triangular())
    q_sqrt_unconstrained = Parameter(tril)

    diff_before_gradient_step = (q_sqrt_constrained -
                                 q_sqrt_unconstrained).numpy()
    assert_allclose(diff_before_gradient_step, 0)

    Fstars = []
    for q_sqrt in [q_sqrt_constrained, q_sqrt_unconstrained]:

        with tf.GradientTape() as tape:
            _, Fstar_var = conditional(Xnew,
                                       Xdata,
                                       kernel,
                                       mu,
                                       q_sqrt=q_sqrt,
                                       white=white)

        grad = tape.gradient(Fstar_var, q_sqrt.unconstrained_variable)
        q_sqrt.unconstrained_variable.assign_sub(grad)
        Fstars.append(Fstar_var)

    diff_Fstar_before_gradient_step = Fstars[0] - Fstars[1]
    assert_allclose(diff_Fstar_before_gradient_step, 0)

    diff_after_gradient_step = (q_sqrt_constrained -
                                q_sqrt_unconstrained).numpy()
    assert_allclose(diff_after_gradient_step, 0)
예제 #3
0
    def __init__(
            self,
            input_dim: int,
            output_dim: int,
            num_data: int,
            w_mu: Optional[np.ndarray] = None,
            w_sqrt: Optional[np.ndarray] = None,
            activation: Optional[Callable] = None,
            is_mean_field: bool = True,
            temperature: float = 1e-4,  # TODO is this intentional?
    ):
        """
        :param input_dim: The input dimension (excluding bias) of this layer.
        :param output_dim: The output dimension of this layer.
        :param num_data: The number of points in the training dataset (used for
            scaling the KL regulariser).
        :param w_mu: Initial value of the variational mean for weights + bias.
            If not specified, this defaults to `xavier_initialization_numpy`
            for the weights and zero for the bias.
        :param w_sqrt: Initial value of the variational Cholesky of the
            (co)variance for weights + bias. If not specified, this defaults to
            1e-5 * Identity.
        :param activation: The activation function. If not specified, this defaults to the identity.
        :param is_mean_field: Determines whether the approximation to the
            weight posterior is mean field. Must be consistent with the shape
            of ``w_sqrt``, if specified.
        :param temperature: For cooling (< 1.0) or heating (> 1.0) the posterior.
        """

        super().__init__(dtype=default_float())

        assert input_dim >= 1
        assert output_dim >= 1
        assert num_data >= 1

        if w_mu is not None:  # add + 1 for the bias
            assert w_mu.shape == ((input_dim + 1) * output_dim, )
        if w_sqrt is not None:
            if not is_mean_field:
                assert w_sqrt.shape == (
                    (input_dim + 1) * output_dim,
                    (input_dim + 1) * output_dim,
                )
            else:
                assert w_sqrt.shape == ((input_dim + 1) * output_dim, )
        assert temperature > 0.0

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_data = num_data

        self.w_mu_ini = w_mu
        self.w_sqrt_ini = w_sqrt

        self.activation = activation
        self.is_mean_field = is_mean_field
        self.temperature = temperature

        self.dim = (input_dim + 1) * output_dim
        self.full_output_cov = False
        self.full_cov = False

        self.w_mu = Parameter(np.zeros((self.dim, )),
                              dtype=default_float(),
                              name="w_mu")  # [dim]

        self.w_sqrt = Parameter(
            np.zeros(
                (self.dim, self.dim)) if not self.is_mean_field else np.ones(
                    (self.dim, )),
            transform=triangular() if not self.is_mean_field else positive(),
            dtype=default_float(),
            name="w_sqrt",
        )  # [dim, dim] or [dim]
예제 #4
0
    def __init__(
        self,
        kernel: MultioutputKernel,
        inducing_variable: MultioutputInducingVariables,
        num_data: int,
        mean_function: Optional[MeanFunction] = None,
        *,
        num_samples: Optional[int] = None,
        full_cov: bool = False,
        full_output_cov: bool = False,
        num_latent_gps: int = None,
        whiten: bool = True,
        name: Optional[str] = None,
        verbose: bool = False,
    ):
        """
        :param kernel: The multioutput kernel for this layer.
        :param inducing_variable: The inducing features for this layer.
        :param num_data: The number of points in the training dataset (see :attr:`num_data`).
        :param mean_function: The mean function that will be applied to the
            inputs. Default: :class:`~gpflow.mean_functions.Identity`.

            .. note:: The Identity mean function requires the input and output
                dimensionality of this layer to be the same. If you want to
                change the dimensionality in a layer, you may want to provide a
                :class:`~gpflow.mean_functions.Linear` mean function instead.

        :param num_samples: The number of samples to draw when converting the
            :class:`~tfp.layers.DistributionLambda` into a `tf.Tensor`, see
            :meth:`_convert_to_tensor_fn`. Will be stored in the
            :attr:`num_samples` attribute.  If `None` (the default), draw a
            single sample without prefixing the sample shape (see
            :class:`tfp.distributions.Distribution`'s `sample()
            <https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/Distribution#sample>`_
            method).
        :param full_cov: Sets default behaviour of calling this layer
            (:attr:`full_cov` attribute):
            If `False` (the default), only predict marginals (diagonal
            of covariance) with respect to inputs.
            If `True`, predict full covariance over inputs.
        :param full_output_cov: Sets default behaviour of calling this layer
            (:attr:`full_output_cov` attribute):
            If `False` (the default), only predict marginals (diagonal
            of covariance) with respect to outputs.
            If `True`, predict full covariance over outputs.
        :param num_latent_gps: The number of (latent) GPs in the layer
            (which can be different from the number of outputs, e.g. with a
            :class:`~gpflow.kernels.LinearCoregionalization` kernel).
            This is used to determine the size of the
            variational parameters :attr:`q_mu` and :attr:`q_sqrt`.
            If possible, it is inferred from the *kernel* and *inducing_variable*.
        :param whiten: If `True` (the default), uses the whitened parameterisation
            of the inducing variables; see :attr:`whiten`.
        :param name: The name of this layer.
        :param verbose: The verbosity mode. Set this parameter to `True`
            to show debug information.
        """

        super().__init__(
            make_distribution_fn=self._make_distribution_fn,
            convert_to_tensor_fn=self._convert_to_tensor_fn,
            dtype=default_float(),
            name=name,
        )

        self.kernel = kernel
        self.inducing_variable = inducing_variable

        self.num_data = num_data

        if mean_function is None:
            mean_function = Identity()
        self.mean_function = mean_function

        self.full_output_cov = full_output_cov
        self.full_cov = full_cov
        self.whiten = whiten
        self.verbose = verbose

        try:
            num_inducing, self.num_latent_gps = verify_compatibility(
                kernel, mean_function, inducing_variable)
            # TODO: if num_latent_gps is not None, verify it is equal to self.num_latent_gps
        except GPLayerIncompatibilityException as e:
            if num_latent_gps is None:
                raise e

            if self.verbose:
                warnings.warn(
                    "Could not verify the compatibility of the `kernel`, `inducing_variable` "
                    "and `mean_function`. We advise using `gpflux.helpers.construct_*` to create "
                    "compatible kernels and inducing variables. As "
                    f"`num_latent_gps={num_latent_gps}` has been specified explicitly, this will "
                    "be used to create the `q_mu` and `q_sqrt` parameters.")

            num_inducing, self.num_latent_gps = (
                len(inducing_variable),
                num_latent_gps,
            )

        self.q_mu = Parameter(
            np.zeros((num_inducing, self.num_latent_gps)),
            dtype=default_float(),
            name=f"{self.name}_q_mu" if self.name else "q_mu",
        )  # [num_inducing, num_latent_gps]

        self.q_sqrt = Parameter(
            np.stack(
                [np.eye(num_inducing) for _ in range(self.num_latent_gps)]),
            transform=triangular(),
            dtype=default_float(),
            name=f"{self.name}_q_sqrt" if self.name else "q_sqrt",
        )  # [num_latent_gps, num_inducing, num_inducing]

        self.num_samples = num_samples
예제 #5
0
    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        num_data: int,
        w_mu: Optional[np.ndarray] = None,
        w_sqrt: Optional[np.ndarray] = None,
        activation: Optional[Callable] = None,
        is_mean_field: bool = True,
        temperature: float = 1e-4,
        returns_samples: bool = True,
    ):
        """
        A Bayesian dense layer for variational Bayesian neural nets. This layer holds the
        weight mean and sqrt as well as the temperature for cooling (or heating) the posterior.

        :param input_dim: The layer's input dimension (excluding bias)
        :param output_dim: The layer's output dimension
        :param num_data: number of data points
        :param w_mu: Initial value of the variational mean (weights + bias)
        :param w_sqrt: Initial value of the variational Cholesky (covering weights + bias)
        :param activation: The type of activation function (None is linear)
        :param is_mean_field: Determines mean field approximation of the weight posterior
        :param temperature: For cooling or heating the posterior
        :param returns_samples: If True, return samples on calling the layer,
             Else return mean and variance
        """

        super().__init__(dtype=default_float())

        assert input_dim >= 1
        assert output_dim >= 1
        assert num_data >= 1
        if w_mu is not None:  # add + 1 for the bias
            assert w_mu.shape == ((input_dim + 1) * output_dim,)
        if w_sqrt is not None:
            if not is_mean_field:
                assert w_sqrt.shape == (
                    (input_dim + 1) * output_dim,
                    (input_dim + 1) * output_dim,
                )
            else:
                assert w_sqrt.shape == ((input_dim + 1) * output_dim,)
        assert temperature > 0.0

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_data = num_data

        self.w_mu_ini = w_mu
        self.w_sqrt_ini = w_sqrt

        self.activation = activation
        self.is_mean_field = is_mean_field
        self.temperature = temperature
        self.returns_samples = returns_samples

        self.dim = (input_dim + 1) * output_dim
        self.full_output_cov = False
        self.full_cov = False

        self.w_mu = Parameter(np.zeros((self.dim,)), dtype=default_float(), name="w_mu")  # [dim]

        self.w_sqrt = Parameter(
            np.zeros((self.dim, self.dim)) if not self.is_mean_field else np.ones((self.dim,)),
            transform=triangular() if not self.is_mean_field else positive(),
            dtype=default_float(),
            name="w_sqrt",
        )  # [dim, dim] or [dim]