class LinearController(gpflow.Module):
    def __init__(self, state_dim, control_dim, max_action=1.0):
        gpflow.Module.__init__(self)
        self.W = Parameter(np.random.rand(control_dim, state_dim))
        self.b = Parameter(np.random.rand(1, control_dim))
        self.max_action = max_action

    def compute_action(self, m, s, squash=True):
        '''
        Simple affine action:  M <- W(m-t) - b
        IN: mean (m) and variance (s) of the state
        OUT: mean (M) and variance (S) of the action
        '''
        M = m @ tf.transpose(self.W) + self.b  # mean output
        S = self.W @ s @ tf.transpose(self.W)  # output variance
        V = tf.transpose(self.W)  #input output covariance
        if squash:
            M, S, V2 = squash_sin(M, S, self.max_action)
            V = V @ V2
        return M, S, V

    def randomize(self):
        mean = 0
        sigma = 1
        self.W.assign(mean + sigma * np.random.normal(size=self.W.shape))
        self.b.assign(mean + sigma * np.random.normal(size=self.b.shape))
Beispiel #2
0
class BayesianDenseLayer(TrackableLayer):
    """
    A dense (fully-connected) layer for variational Bayesian neural networks.

    This layer holds the mean and square-root of the variance of the
    distribution over the weights. This layer also has a temperature for
    cooling (or heating) the posterior.
    """
    def __init__(
            self,
            input_dim: int,
            output_dim: int,
            num_data: int,
            w_mu: Optional[np.ndarray] = None,
            w_sqrt: Optional[np.ndarray] = None,
            activation: Optional[Callable] = None,
            is_mean_field: bool = True,
            temperature: float = 1e-4,  # TODO is this intentional?
    ):
        """
        :param input_dim: The input dimension (excluding bias) of this layer.
        :param output_dim: The output dimension of this layer.
        :param num_data: The number of points in the training dataset (used for
            scaling the KL regulariser).
        :param w_mu: Initial value of the variational mean for weights + bias.
            If not specified, this defaults to `xavier_initialization_numpy`
            for the weights and zero for the bias.
        :param w_sqrt: Initial value of the variational Cholesky of the
            (co)variance for weights + bias. If not specified, this defaults to
            1e-5 * Identity.
        :param activation: The activation function. If not specified, this defaults to the identity.
        :param is_mean_field: Determines whether the approximation to the
            weight posterior is mean field. Must be consistent with the shape
            of ``w_sqrt``, if specified.
        :param temperature: For cooling (< 1.0) or heating (> 1.0) the posterior.
        """

        super().__init__(dtype=default_float())

        assert input_dim >= 1
        assert output_dim >= 1
        assert num_data >= 1

        if w_mu is not None:  # add + 1 for the bias
            assert w_mu.shape == ((input_dim + 1) * output_dim, )
        if w_sqrt is not None:
            if not is_mean_field:
                assert w_sqrt.shape == (
                    (input_dim + 1) * output_dim,
                    (input_dim + 1) * output_dim,
                )
            else:
                assert w_sqrt.shape == ((input_dim + 1) * output_dim, )
        assert temperature > 0.0

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_data = num_data

        self.w_mu_ini = w_mu
        self.w_sqrt_ini = w_sqrt

        self.activation = activation
        self.is_mean_field = is_mean_field
        self.temperature = temperature

        self.dim = (input_dim + 1) * output_dim
        self.full_output_cov = False
        self.full_cov = False

        self.w_mu = Parameter(np.zeros((self.dim, )),
                              dtype=default_float(),
                              name="w_mu")  # [dim]

        self.w_sqrt = Parameter(
            np.zeros(
                (self.dim, self.dim)) if not self.is_mean_field else np.ones(
                    (self.dim, )),
            transform=triangular() if not self.is_mean_field else positive(),
            dtype=default_float(),
            name="w_sqrt",
        )  # [dim, dim] or [dim]

    def initialize_variational_distribution(self) -> None:
        if self.w_mu_ini is None:
            w = xavier_initialization_numpy(self.input_dim, self.output_dim)
            b = np.zeros((1, self.output_dim))
            self.w_mu_ini = np.concatenate((w, b), axis=0).reshape(
                (self.dim, ))
        self.w_mu.assign(self.w_mu_ini)

        if self.w_sqrt_ini is None:
            if not self.is_mean_field:
                self.w_sqrt_ini = 1e-5 * np.eye(self.dim)
            else:
                self.w_sqrt_ini = 1e-5 * np.ones((self.dim, ))
        self.w_sqrt.assign(self.w_sqrt_ini)

    def build(self, input_shape: ShapeType) -> None:
        """Build the variables necessary on first call"""
        super().build(input_shape)
        self.initialize_variational_distribution()

    def predict_samples(
        self,
        inputs: TensorType,
        *,
        num_samples: Optional[int] = None,
    ) -> tf.Tensor:
        """
        Samples from the approximate posterior at N test inputs, with input_dim = D, output_dim = Q.

        :param inputs: The inputs to predict at; shape ``[N, D]``.
        :param num_samples: The number of samples S, to draw.
        :returns: Samples, shape ``[S, N, Q]`` if S is not None else ``[N, Q]``.
        """
        _num_samples = num_samples or 1
        z = tf.random.normal((self.dim, _num_samples),
                             dtype=default_float())  # [dim, S]
        if not self.is_mean_field:
            w = self.w_mu[:, None] + tf.matmul(self.w_sqrt, z)  # [dim, S]
        else:
            w = self.w_mu[:, None] + self.w_sqrt[:, None] * z  # [dim, S]

        N = tf.shape(inputs)[0]
        inputs_concat_1 = tf.concat(
            (inputs, tf.ones(
                (N, 1), dtype=default_float())), axis=-1)  # [N, D+1]
        samples = tf.tensordot(
            inputs_concat_1,
            tf.reshape(tf.transpose(w),
                       (_num_samples, self.input_dim + 1, self.output_dim)),
            [[-1], [1]],
        )  # [N, S, Q]
        if num_samples is None:
            samples = tf.squeeze(samples, axis=-2)  # [N, Q]
        else:
            samples = tf.transpose(samples, perm=[1, 0, 2])  # [S, N, Q]

        if self.activation is not None:
            samples = self.activation(samples)

        return samples

    def call(
            self,
            inputs: TensorType,
            training: Optional[bool] = False
    ) -> Union[tf.Tensor, MeanAndVariance]:
        """
        The default behaviour upon calling this layer.
        """
        sample = self.predict_samples(
            inputs,
            num_samples=None,
        )

        # TF quirk: add_loss must add a tensor to compile
        if training:
            # Wenzel et al. 2020: How good is the Bayes posterior in DNNs really?
            loss = self.temperature * self.prior_kl()
        else:
            loss = tf.constant(0.0, dtype=default_float())
        loss_per_datapoint = loss / self.num_data

        self.add_loss(loss_per_datapoint)

        return sample  # [N, Q]

    def prior_kl(self) -> tf.Tensor:
        """
        Returns the KL divergence ``KL[q(u)∥p(u)]`` from the prior ``p(u) = N(0, I)`` to
        the variational distribution ``q(u) = N(w_mu, w_sqrt²)``.
        """
        return gauss_kl(
            self.w_mu[:, None],
            self.w_sqrt[None] if not self.is_mean_field else self.w_sqrt[:,
                                                                         None],
        )
Beispiel #3
0
class BayesianDenseLayer(TrackableLayer):
    """A Bayesian dense layer for variational Bayesian neural networks"""

    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        num_data: int,
        w_mu: Optional[np.ndarray] = None,
        w_sqrt: Optional[np.ndarray] = None,
        activation: Optional[Callable] = None,
        is_mean_field: bool = True,
        temperature: float = 1e-4,
        returns_samples: bool = True,
    ):
        """
        A Bayesian dense layer for variational Bayesian neural nets. This layer holds the
        weight mean and sqrt as well as the temperature for cooling (or heating) the posterior.

        :param input_dim: The layer's input dimension (excluding bias)
        :param output_dim: The layer's output dimension
        :param num_data: number of data points
        :param w_mu: Initial value of the variational mean (weights + bias)
        :param w_sqrt: Initial value of the variational Cholesky (covering weights + bias)
        :param activation: The type of activation function (None is linear)
        :param is_mean_field: Determines mean field approximation of the weight posterior
        :param temperature: For cooling or heating the posterior
        :param returns_samples: If True, return samples on calling the layer,
             Else return mean and variance
        """

        super().__init__(dtype=default_float())

        assert input_dim >= 1
        assert output_dim >= 1
        assert num_data >= 1
        if w_mu is not None:  # add + 1 for the bias
            assert w_mu.shape == ((input_dim + 1) * output_dim,)
        if w_sqrt is not None:
            if not is_mean_field:
                assert w_sqrt.shape == (
                    (input_dim + 1) * output_dim,
                    (input_dim + 1) * output_dim,
                )
            else:
                assert w_sqrt.shape == ((input_dim + 1) * output_dim,)
        assert temperature > 0.0

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_data = num_data

        self.w_mu_ini = w_mu
        self.w_sqrt_ini = w_sqrt

        self.activation = activation
        self.is_mean_field = is_mean_field
        self.temperature = temperature
        self.returns_samples = returns_samples

        self.dim = (input_dim + 1) * output_dim
        self.full_output_cov = False
        self.full_cov = False

        self.w_mu = Parameter(np.zeros((self.dim,)), dtype=default_float(), name="w_mu")  # [dim]

        self.w_sqrt = Parameter(
            np.zeros((self.dim, self.dim)) if not self.is_mean_field else np.ones((self.dim,)),
            transform=triangular() if not self.is_mean_field else positive(),
            dtype=default_float(),
            name="w_sqrt",
        )  # [dim, dim] or [dim]

    def initialize_variational_distribution(self) -> None:
        if self.w_mu_ini is None:
            w = xavier_initialization_numpy(self.input_dim, self.output_dim)
            b = np.zeros((1, self.output_dim))
            self.w_mu_ini = np.concatenate((w, b), axis=0).reshape((self.dim,))
        self.w_mu.assign(self.w_mu_ini)

        if self.w_sqrt_ini is None:
            if not self.is_mean_field:
                self.w_sqrt_ini = 1e-5 * np.eye(self.dim)
            else:
                self.w_sqrt_ini = 1e-5 * np.ones((self.dim,))
        self.w_sqrt.assign(self.w_sqrt_ini)

    def build(self, input_shape: ShapeType) -> None:
        """Build the variables necessary on first call"""
        super().build(input_shape)
        self.initialize_variational_distribution()

    def predict_samples(
        self,
        inputs: TensorType,
        *,
        num_samples: Optional[int] = None,
        full_output_cov: bool = False,
        full_cov: bool = False,
        whiten: bool = False,
    ) -> tf.Tensor:
        """
        Make a sample predictions at N test inputs, with input_dim = D, output_dim = Q. Return a
        sample, and the conditional mean and covariance at these points.

        :param inputs: the inputs to predict at. shape [N, D]
        :param num_samples: the number of samples S, to draw.
            shape [S, N, Q] if S is not None else [N, Q].
        :param full_output_cov: assert to False since not supported for now
        :param full_cov: assert to False since not supported for now
        :param whiten: assert to False since not sensible in Bayesian neural nets
        """
        assert full_output_cov is False
        assert full_cov is False
        assert whiten is False

        _num_samples = num_samples or 1
        z = tf.random.normal((self.dim, _num_samples), dtype=default_float())  # [dim, S]
        if not self.is_mean_field:
            w = self.w_mu[:, None] + tf.matmul(self.w_sqrt, z)  # [dim, S]
        else:
            w = self.w_mu[:, None] + self.w_sqrt[:, None] * z  # [dim, S]

        N = tf.shape(inputs)[0]
        inputs_concat_1 = tf.concat(
            (inputs, tf.ones((N, 1), dtype=default_float())), axis=-1
        )  # [N, D+1]
        samples = tf.tensordot(
            inputs_concat_1,
            tf.reshape(tf.transpose(w), (_num_samples, self.input_dim + 1, self.output_dim)),
            [[-1], [1]],
        )  # [N, S, Q]
        if num_samples is None:
            samples = tf.squeeze(samples, axis=-2)  # [N, Q]
        else:
            samples = tf.transpose(samples, perm=[1, 0, 2])  # [S, N, Q]

        if self.activation is not None:
            samples = self.activation(samples)

        return samples

    def call(
        self, inputs: TensorType, training: Optional[bool] = False
    ) -> Union[tf.Tensor, MeanAndVariance]:
        """The default behaviour upon calling the BayesianDenseLayer()(X)"""
        sample = self.predict_samples(
            inputs,
            num_samples=None,
            full_output_cov=self.full_output_cov,
            full_cov=self.full_cov,
        )

        # TF quirk: add_loss must add a tensor to compile
        if training:
            # Wenzel et al. 2020: How good is the Bayes posterior in DNNs really?
            loss = self.temperature * self.prior_kl()
        else:
            loss = tf.constant(0.0, dtype=default_float())
        loss_per_datapoint = loss / self.num_data

        self.add_loss(loss_per_datapoint)

        # for latent layers, return samples
        if self.returns_samples:
            return sample  # [N, Q]
        # for output layers, return samples as mean with 0 cov
        return sample, tf.ones_like(sample) * 1e-10  # [N, Q], [N, Q]

    def prior_kl(self) -> tf.Tensor:
        """
        The KL divergence from the variational distribution to the prior
        :return: KL divergence from N(w_mu, w_sqrt) to N(0, I)
        """
        return gauss_kl(
            self.w_mu[:, None],
            self.w_sqrt[None] if not self.is_mean_field else self.w_sqrt[:, None],
        )