Esempio n. 1
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, input_dim = self._get_X_dims(X)
        W_shape, b_shape = self._weight_shapes(input_dim)

        # Layer weights
        self.pW = _make_prior(self.std, self.pW, W_shape)
        self.qW = _make_posterior(self.std, self.qW, W_shape, self.full)

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        # Linear layer
        Wsamples = _sample_W(self.qW, n_samples)
        Net = tf.matmul(X, Wsamples)

        # Optional bias
        if self.use_bias or not (self.prior_b is None and self.post_b is None):
            # Layer intercepts
            self.pb = _make_prior(self.std, self.pb, b_shape)
            self.qb = _make_posterior(self.std, self.qb, b_shape, False)

            # Regularizers
            KL += kl_sum(self.qb, self.pb)

            # Linear layer
            bsamples = tf.expand_dims(_sample_W(self.qb, n_samples), 1)
            Net += bsamples

        return Net, KL
Esempio n. 2
0
def test_kl_normal_normal():
    """Test Normal/Normal KL."""
    dim = (5, 10)
    mu = np.zeros(dim, dtype=np.float32)
    std = 1.0

    q = tf.distributions.Normal(mu, std)

    # Test 0 KL
    p = tf.distributions.Normal(mu, std)
    KL0 = kl_sum(q, p)

    # Test diff var
    std1 = 2.0
    p = tf.distributions.Normal(mu, std1)
    KL1 = kl_sum(q, p)
    rKL1 = 0.5 * ((std / std1)**2 - 1 + np.log((std1 / std)**2)) * np.prod(dim)

    # Test diff mu
    mu1 = np.ones(dim, dtype=np.float32)
    p = tf.distributions.Normal(mu1, std)
    KL2 = kl_sum(q, p)
    rKL2 = 0.5 * (np.sum((mu1 - mu)**2) / std**2)

    tc = tf.test.TestCase()
    with tc.test_session():
        kl0 = KL0.eval()
        assert np.isscalar(kl0)
        assert kl0 == 0.
        assert np.allclose(KL1.eval(), rKL1)
        assert np.allclose(KL2.eval(), rKL2)
Esempio n. 3
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, (height, width, channels) = self._get_X_dims(X)
        W_shape, b_shape = self._weight_shapes(channels)

        # Layer weights
        self.pW = _make_prior(self.std, self.pW, W_shape)
        self.qW = _make_posterior(self.std, self.qW, W_shape, False)

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        # Linear layer
        Wsamples = _sample_W(self.qW, n_samples, False)
        Net = tf.map_fn(lambda args: tf.nn.conv2d(
            *args, padding=self.padding, strides=self.strides),
                        elems=(X, Wsamples),
                        dtype=tf.float32)

        # Optional bias
        if self.use_bias or not (self.prior_b is None and self.post_b is None):
            # Layer intercepts
            self.pb = _make_prior(self.std, self.pb, b_shape)
            self.qb = _make_posterior(self.std, self.qb, b_shape, False)

            # Regularizers
            KL += kl_sum(self.qb, self.pb)

            # Linear layer
            bsamples = tf.reshape(_sample_W(self.qb, n_samples, False),
                                  [n_samples, 1, 1, 1, self.filters])
            Net += bsamples

        return Net, KL
Esempio n. 4
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, (input_dim,) = self._get_X_dims(X)
        W_shp, b_shp = self._weight_shapes(input_dim)

        self.pstd, self.qstd = initialise_stds(input_dim, self.output_dim,
                                               self.prior_std0,
                                               self.learn_prior, "dense")

        # Layer weights
        self.pW = _make_prior(self.pstd, W_shp)
        self.qW = _make_posterior(self.qstd, W_shp, self.full, "dense")

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        # Linear layer
        Wsamples = _sample_W(self.qW, n_samples)
        Net = tf.matmul(X, Wsamples)

        # Optional bias
        if self.use_bias:
            # Layer intercepts
            self.pb = _make_prior(self.pstd, b_shp)
            self.qb = _make_posterior(self.qstd, b_shp, False, "dense_bias")

            # Regularizers
            KL += kl_sum(self.qb, self.pb)

            # Linear layer
            bsamples = tf.expand_dims(_sample_W(self.qb, n_samples), 1)
            Net += bsamples

        return Net, KL
Esempio n. 5
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, (input_dim,) = self._get_X_dims(X)
        W_shape, _ = self._weight_shapes(self.n_categories)
        n_batch = tf.shape(X)[1]

        self.pstd, self.qstd = initialise_stds(input_dim, self.output_dim,
                                               self.prior_std0,
                                               self.learn_prior, "embed")

        # Layer weights
        self.pW = _make_prior(self.pstd, W_shape)
        self.qW = _make_posterior(self.qstd, W_shape, self.full, "embed")

        # Index into the relevant weights rather than using sparse matmul
        Wsamples = _sample_W(self.qW, n_samples)
        features = tf.map_fn(lambda wx: tf.gather(*wx, axis=0), (Wsamples, X),
                             dtype=Wsamples.dtype)

        # Now concatenate the resulting features on the last axis
        f_dims = int(np.prod(features.shape[2:]))  # need this for placeholders
        Net = tf.reshape(features, [n_samples, n_batch, f_dims])

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        return Net, KL
Esempio n. 6
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, (height, width, channels) = self._get_X_dims(X)
        W_shp, b_shp = self._weight_shapes(channels)

        # get effective IO shapes, DAN's fault if this is wrong
        receptive_field = np.product(W_shp[:-2])
        n_inputs = receptive_field * channels
        n_outputs = receptive_field * self.filters

        self.pstd, self.qstd = initialise_stds(n_inputs, n_outputs,
                                               self.prior_std0,
                                               self.learn_prior, "conv2d")
        # Layer weights
        self.pW = _make_prior(self.pstd, W_shp)
        self.qW = _make_posterior(self.qstd, W_shp, False, "conv")

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        # Linear layer
        Wsamples = _sample_W(self.qW, n_samples, False)
        Net = tf.map_fn(
            lambda args: tf.nn.conv2d(*args,
                                      padding=self.padding,
                                      strides=self.strides),
            elems=(X, Wsamples), dtype=tf.float32)

        # Optional bias
        if self.use_bias:
            # Layer intercepts
            self.pb = _make_prior(self.pstd, b_shp)
            self.qb = _make_posterior(self.qstd, b_shp, False, "conv_bias")

            # Regularizers
            KL += kl_sum(self.qb, self.pb)

            # Linear layer
            bsamples = tf.reshape(_sample_W(self.qb, n_samples, False),
                                  [n_samples, 1, 1, 1, self.filters])
            Net += bsamples

        return Net, KL
Esempio n. 7
0
    def weights(self, input_dim, n_features, dtype=np.float32):
        """Generate the random fourier weights for this kernel.

        Parameters
        ----------
        input_dim : int
            the input dimension to this layer.
        n_features : int
            the number of unique random features, the actual output dimension
            of this layer will be ``2 * n_features``.
        dtype : np.dtype
            the dtype of the features to draw, this should match the
            observations.

        Returns
        -------
        P : ndarray
            the random weights of the fourier features of shape
            ``(input_dim, n_features)``.
        KL : Tensor, float
            the KL penalty associated with the parameters in this kernel.

        """
        self.lenscale, self.lenscale_post = _init_lenscale(self.given_lenscale,
                                                           self.learn_lenscale,
                                                           input_dim)
        dim = (input_dim, n_features)

        # Setup the prior, lenscale may be a variable, so dont use prior_normal
        pP_scale = self.__len2std(self.lenscale, n_features)
        pP = tf.distributions.Normal(
            loc=tf.zeros(dim),
            scale=pP_scale)
        # Initialise the posterior
        qP_scale = 1.0 / self.lenscale_post
        if qP_scale.ndim > 0:
            qP_scale = np.repeat(qP_scale[:, np.newaxis], n_features, axis=1)
        qP = norm_posterior(dim=dim, std0=qP_scale, suffix="kernel")

        KL = kl_sum(qP, pP)

        # We implement the VAR-FIXED method here from Cutajar et. al 2017, so
        # we pre-generate and fix the standard normal samples
        e = self._random_state.randn(*dim).astype(dtype)
        P = qP.mean() + qP.stddev() * e

        return P, KL
Esempio n. 8
0
def test_kl_gaussian_gaussian(random):
    """Test Gaussian/Gaussian KL."""
    dim = (5, 10)
    Dim = (5, 10, 10)

    mu0 = random.randn(*dim).astype(np.float32)
    L0 = random_chol(Dim)
    q = tfp.distributions.MultivariateNormalTriL(mu0, L0)

    mu1 = random.randn(*dim).astype(np.float32)
    L1 = random_chol(Dim)
    p = tfp.distributions.MultivariateNormalTriL(mu1, L1)

    KL = kl_sum(q, p)
    KLr = KLdiv(mu0, L0, mu1, L1)

    tc = tf.test.TestCase()
    with tc.test_session():
        assert np.allclose(KL.eval(), KLr)
Esempio n. 9
0
    def weights(self, input_dim, n_features, dtype=np.float32):
        """Generate the random fourier weights for this kernel.

        Parameters
        ----------
        input_dim : int
            the input dimension to this layer.
        n_features : int
            the number of unique random features, the actual output dimension
            of this layer will be ``2 * n_features``.
        dtype : np.dtype
            the dtype of the features to draw, this should match the
            observations.

        Returns
        -------
        P : ndarray
            the random weights of the fourier features of shape
            ``(input_dim, n_features)``.
        KL : Tensor, float
            the KL penalty associated with the parameters in this kernel.

        """
        dim = (input_dim, n_features)

        # Setup the prior, lenscale may be a variable, so dont use prior_normal
        pP = tf.distributions.Normal(loc=tf.zeros(dim),
                                     scale=self.__len2std(self.lenscale))

        # Initialise the posterior
        if self.lenscale_post is None:
            self.lenscale_post = np.sqrt(1 / input_dim)
        qP = norm_posterior(dim=dim, std0=self.__len2std(self.lenscale_post))

        KL = kl_sum(qP, pP)

        # We implement the VAR-FIXED method here from Cutajar et. al 2017, so
        # we pre-generate and fix the standard normal samples
        rand = np.random.RandomState(next(seedgen))
        e = rand.randn(*dim).astype(dtype)
        P = qP.mean() + qP.stddev() * e

        return P, KL
Esempio n. 10
0
    def _build(self, X):
        """Build the graph of this layer."""
        n_samples, input_dim = self._get_X_dims(X)
        W_shape, _ = self._weight_shapes(self.n_categories)

        assert input_dim == 1, "X must be a *column* of indices!"

        # Layer weights
        self.pW = self._make_prior(self.pW, W_shape)
        self.qW = self._make_posterior(self.qW, W_shape)

        # Index into the relevant weights rather than using sparse matmul
        Wsamples = self._sample_W(self.qW, n_samples)
        Net = tf.gather(Wsamples, X[0, :, 0], axis=1)

        # Regularizers
        KL = kl_sum(self.qW, self.pW)

        return Net, KL
Esempio n. 11
0
    def _build(self, X):
        # Extract perturbed predictions
        n_samples = tf.shape(X)[0] // 2
        X_orig, X_pert = X[:n_samples], X[n_samples:]

        # Build Dense Layer
        F, KL = super()._build(X_orig)

        # Build a latent function density
        qWmean = _tile2samples(n_samples, tf.transpose(self.qW.mean()))
        qWvar = _tile2samples(n_samples, tf.transpose(self.qW.variance()))
        f_loc = tf.matmul(X_pert, qWmean)
        if self.use_bias:
            f_loc += self.qb.mean()
        f_scale = tf.sqrt(tf.matmul(X_pert ** 2, qWvar))
        f_post = tf.distributions.Normal(f_loc, f_scale)

        # Calculate NCP loss
        KL += kl_sum(f_post, self.f_prior) / tf.to_float(n_samples)

        return F, KL
Esempio n. 12
0
def test_kl_gaussian_normal(random):
    """Test Gaussian/Normal KL."""
    dim = (5, 10)
    Dim = (5, 10, 10)

    mu0 = random.randn(*dim).astype(np.float32)
    L0 = random_chol(Dim)
    q = tfp.distributions.MultivariateNormalTriL(mu0, L0)

    mu1 = random.randn(*dim).astype(np.float32)
    std1 = 1.0
    L1 = [(std1 * np.eye(dim[1])).astype(np.float32) for _ in range(dim[0])]
    p = tf.distributions.Normal(mu1, std1)

    KL = kl_sum(q, p)
    KLr = KLdiv(mu0, L0, mu1, L1)

    tc = tf.test.TestCase()
    with tc.test_session():
        kl = KL.eval()
        assert np.isscalar(kl)
        assert np.allclose(kl, KLr)