Esempio n. 1
0
class DGP_Base(Model):
    """
    The base class for Deep Gaussian process models.

    Implements a Monte-Carlo variational bound and convenience functions.

    """
    def __init__(self, X, Y, likelihood, layers,
                 minibatch_size=None,
                 num_samples=1):
        Model.__init__(self)
        self.num_samples = num_samples

        self.num_data = X.shape[0]
        if minibatch_size:
            self.X = Minibatch(X, minibatch_size, seed=0)
            self.Y = Minibatch(Y, minibatch_size, seed=0)
        else:
            self.X = DataHolder(X)
            self.Y = DataHolder(Y)

        self.likelihood = BroadcastingLikelihood(likelihood)

        self.layers = ParamList(layers)

    @params_as_tensors
    def propagate(self, X, full_cov=False, S=1, zs=None):
        sX = tf.tile(tf.expand_dims(X, 0), [S, 1, 1])

        Fs, Fmeans, Fvars = [], [], []

        F = sX
        zs = zs or [None, ] * len(self.layers)
        for layer, z in zip(self.layers, zs):
            F, Fmean, Fvar = layer.sample_from_conditional(F, z=z, full_cov=full_cov)

            Fs.append(F)
            Fmeans.append(Fmean)
            Fvars.append(Fvar)

        return Fs, Fmeans, Fvars

    @params_as_tensors
    def _build_predict(self, X, full_cov=False, S=1):
        Fs, Fmeans, Fvars = self.propagate(X, full_cov=full_cov, S=S)
        return Fmeans[-1], Fvars[-1]

    def E_log_p_Y(self, X, Y):
        """
        Calculate the expectation of the data log likelihood under the variational distribution
         with MC samples
        """
        Fmean, Fvar = self._build_predict(X, full_cov=False, S=self.num_samples)
        var_exp = self.likelihood.variational_expectations(Fmean, Fvar, Y)  # S, N, D
        return tf.reduce_mean(var_exp, 0)  # N, D

    @params_as_tensors
    def _build_likelihood(self):
        L = tf.reduce_sum(self.E_log_p_Y(self.X, self.Y))
        KL = tf.reduce_sum([layer.KL() for layer in self.layers])
        scale = tf.cast(self.num_data, float_type)
        scale /= tf.cast(tf.shape(self.X)[0], float_type)  # minibatch size
        return L * scale - KL

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f(self, Xnew, num_samples):
        return self._build_predict(Xnew, full_cov=False, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f_full_cov(self, Xnew, num_samples):
        return self._build_predict(Xnew, full_cov=True, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=False, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers_full_cov(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=True, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_y(self, Xnew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        return self.likelihood.predict_mean_and_var(Fmean, Fvar)

    @autoflow((float_type, [None, None]), (float_type, [None, None]), (tf.int32, []))
    def predict_density(self, Xnew, Ynew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        l = self.likelihood.predict_density(Fmean, Fvar, Ynew)
        log_num_samples = tf.log(tf.cast(num_samples, float_type))
        return tf.reduce_logsumexp(l - log_num_samples, axis=0)
Esempio n. 2
0
class LikelihoodTester(Model):
    def __init__(self, likelihood):
        Model.__init__(self)
        self.wrapped_likelihood = BroadcastingLikelihood(likelihood)
        self.likelihood = likelihood

    def _build_likelihood(self):
        return tf.cast(0., dtype=settings.float_type)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def logp1(self, F, Y):
        return self.wrapped_likelihood.logp(F, Y)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def logp2(self, F, Y):
        f = lambda a: self.likelihood.logp(a, Y)
        return tf.stack(tf.map_fn(f, F, dtype=settings.float_type))

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]))
    def conditional_mean1(self, F):
        return self.wrapped_likelihood.conditional_mean(F)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]))
    def conditional_mean2(self, F):
        f = lambda a: tf.cast(self.likelihood.conditional_mean(a),
                              dtype=settings.float_type)
        return tf.stack(tf.map_fn(f, F, dtype=settings.float_type))

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]))
    def conditional_variance1(self, F):
        return self.wrapped_likelihood.conditional_variance(F)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]))
    def conditional_variance2(self, F):
        f = lambda a: tf.cast(self.likelihood.conditional_variance(a),
                              dtype=settings.float_type)
        return tf.stack(tf.map_fn(f, F, dtype=settings.float_type))

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]))
    def predict_mean_and_var1(self, Fmu, Fvar):
        return self.wrapped_likelihood.predict_mean_and_var(Fmu, Fvar)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]))
    def predict_mean_and_var2(self, Fmu, Fvar):
        f = lambda a: list(self.likelihood.predict_mean_and_var(a[0], a[1]))
        m, v = tf.map_fn(f, [Fmu, Fvar],
                         dtype=[settings.float_type, settings.float_type])
        return tf.stack(m), tf.stack(v)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def predict_density1(self, Fmu, Fvar, Y):
        return self.wrapped_likelihood.predict_density(Fmu, Fvar, Y)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def predict_density2(self, Fmu, Fvar, Y):
        f = lambda a: self.likelihood.predict_density(a[0], a[1], Y)
        return tf.stack(tf.map_fn(f, [Fmu, Fvar], dtype=settings.float_type))

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def variational_expectations1(self, Fmu, Fvar, Y):
        return self.wrapped_likelihood.variational_expectations(Fmu, Fvar, Y)

    @params_as_tensors
    @autoflow((settings.float_type, [None, None, None]),
              (settings.float_type, [None, None, None]),
              (settings.float_type, [None, None]))
    def variational_expectations2(self, Fmu, Fvar, Y):
        f = lambda a: self.likelihood.variational_expectations(a[0], a[1], Y)
        return tf.stack(tf.map_fn(f, [Fmu, Fvar], dtype=settings.float_type))
Esempio n. 3
0
class DGP_Base(Model):
    """
    The base class for Deep Gaussian process models.

    Implements a Monte-Carlo variational bound and convenience functions.
    """
    def __init__(self,
                 X,
                 Y,
                 likelihood,
                 layers,
                 minibatch_size=None,
                 num_samples=1,
                 **kwargs):
        """

        :param X: List of training inputs where each element of the list is a numpy array corresponding to the inputs of one fidelity.
        :param Y: List of training targets where each element of the list is a numpy array corresponding to the inputs of one fidelity.
        :param likelihood: gpflow likelihood object for use at the final layer
        :param layers: List of doubly_stochastic_dgp.layers.Layer objects
        :param minibatch_size: Minibatch size if using minibatch trainingz
        :param num_samples: Number of samples when propagating predictions through layers
        :param kwargs: kwarg inputs to gpflow.models.Model
        """

        Model.__init__(self, **kwargs)

        self.Y_list = Y
        self.X_list = X
        self.minibatch_size = minibatch_size

        self.num_samples = num_samples

        # This allows a training regime where the first layer is trained first by itself, then the subsequent layer
        # and so on.
        self._train_upto_fidelity = -1

        if minibatch_size:
            for i, (x, y) in enumerate(zip(X, Y)):
                setattr(self, "num_data" + str(i), x.shape[0])
                setattr(self, "X" + str(i), Minibatch(x,
                                                      minibatch_size,
                                                      seed=0))
                setattr(self, "Y" + str(i), Minibatch(y,
                                                      minibatch_size,
                                                      seed=0))
        else:
            for i, (x, y) in enumerate(zip(X, Y)):
                setattr(self, "num_data" + str(i), x.shape[0])
                setattr(self, "X" + str(i), DataHolder(x))
                setattr(self, "Y" + str(i), DataHolder(y))

        self.num_layers = len(layers)
        self.layers = ParamList(layers)

        self.likelihood = BroadcastingLikelihood(likelihood)

    @params_as_tensors
    def propagate(self, X, full_cov=False, S=1, zs=None):
        """
        Propagate some prediction to the final layer and return predictions at each intermediate layer

        :param X: Input(s) at which to predict at
        :param full_cov: Whether the predict with the full covariance matrix
        :param S: Number of samples to use for sampling at intermediate layers
        :param zs: ??
        :return:
        """
        sX = tf.tile(tf.expand_dims(X, 0), [S, 1, 1])

        Fs, Fmeans, Fvars = [], [], []

        F = sX
        zs = (zs or [
            None,
        ] * len(self.layers))

        for i, (layer, z) in enumerate(zip(self.layers, zs)):
            if i == 0:
                F, Fmean, Fvar = layer.sample_from_conditional(
                    F, z=z, full_cov=full_cov)
            else:
                """

                KC - At all layers 1..L, the input to the next layer is original input augmented with
                the realisation of the function at the previous layer at that input.

                """
                F_aug = tf.concat([sX, F], 2)
                F, Fmean, Fvar = layer.sample_from_conditional(
                    F_aug, z=z, full_cov=full_cov)

            Fs.append(F)
            Fmeans.append(Fmean)
            Fvars.append(Fvar)

        return Fs, Fmeans, Fvars

    @params_as_tensors
    def _build_predict(self, X, full_cov=False, S=1, fidelity=None):
        """
        Predicts from the fidelity level specified. If fidelity is not specified, return prediction at highest fidelity.

        :param X: Location at which to predict
        :param full_cov: Whether to predict full covariance matrix
        :param S: Number of samples to use for MC sampling between layers
        :param fidelity: zero based fidelity index at which to predict
        :return: (mean, variance) where each is [S, N, 1] where S is number of samples and N is number of predicted points.
        """

        if fidelity is None:
            fidelity = -1

        _, Fmeans, Fvars = self.propagate(X, full_cov=full_cov, S=S)
        return Fmeans[fidelity], Fvars[fidelity]

    def _likelihood_at_fidelity(self, Fmu, Fvar, Y, variance):
        """
        Calculate likelihood term for observations corresponding to one fidelity

        :param Fmu: Posterior mean
        :param Fvar: Posterior variance
        :param Y: training observations
        :param variance: likelihood variance
        :return:
        """
        return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(variance) - 0.5 * (
            tf.square(Y - Fmu) + Fvar) / variance

    def E_log_p_Y(self, X_f, Y_f, fidelity=None):
        """
        Calculate the expectation of the data log likelihood under the variational distribution with MC samples

        :param X_f: Training inputs for a given
        :param Y_f:
        :param fidelity:
        :return:
        """

        Fmean, Fvar = self._build_predict(X_f,
                                          full_cov=False,
                                          S=self.num_samples,
                                          fidelity=fidelity)

        if fidelity == (self.num_layers - 1):
            """
            KC - The likelihood of the observations at the last layer is computed using the model's 'likelihood' object
            """
            var_exp = self.likelihood.variational_expectations(
                Fmean, Fvar, Y_f)  # S, N, D
        else:
            """
            KC - The Gaussian likelihood of the observations at the intermediate layers is computed using the noise
            parameter pertaining to the White noise kernel.

            This assumes that a White kernel should be added to all layers except for the last!
            If no noise is desired, the variance parameter in the White kernel should be set to zero and fixed.
            """
            variance = self.layers[fidelity].kern.kernels[-1].variance

            f = lambda vars_SND, vars_ND, vars_N: self._likelihood_at_fidelity(
                vars_SND[0], vars_SND[1], vars_ND[0], vars_N)

            var_exp = f([Fmean, Fvar], [tf.expand_dims(Y_f, 0)], variance)

        return tf.reduce_mean(var_exp, 0)  # N, D

    @params_as_tensors
    def _build_likelihood(self):
        """
        ELBO calculation
        :return: MC estimate of lower bound
        """
        L = 0.0
        KL = 0.0
        for fidelity in range(self.num_layers):

            if (self._train_upto_fidelity !=
                    -1) and (fidelity > self._train_upto_fidelity):
                continue

            X_l = getattr(self, "X" + str(fidelity))
            Y_l = getattr(self, "Y" + str(fidelity))

            n_data = getattr(self, "num_data" + str(fidelity))
            scale = tf.cast(n_data, float_type) / tf.cast(
                tf.shape(X_l)[0], float_type)

            L += tf.reduce_sum(self.E_log_p_Y(X_l, Y_l, fidelity)) * scale
            KL += tf.reduce_sum(self.layers[fidelity].KL())

        self.L = L
        self.KL = KL

        return self.L - self.KL

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f(self, Xnew, num_samples, fidelity=None):
        return self._build_predict(Xnew,
                                   full_cov=False,
                                   S=num_samples,
                                   fidelity=fidelity)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f_full_cov(self, Xnew, num_samples, fidelity=None):
        return self._build_predict(Xnew,
                                   full_cov=True,
                                   S=num_samples,
                                   fidelity=fidelity)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=False, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers_full_cov(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=True, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_y(self, Xnew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        return self.likelihood.predict_mean_and_var(Fmean, Fvar)

    @autoflow((float_type, [None, None]), (float_type, [None, None]),
              (tf.int32, []))
    def predict_density(self, Xnew, Ynew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        l = self.likelihood.predict_density(Fmean, Fvar, Ynew)
        log_num_samples = tf.log(tf.cast(num_samples, float_type))
        return tf.reduce_logsumexp(l - log_num_samples, axis=0)

    @classmethod
    def make_mf_dgp(cls, X, Y, Z, add_linear=True, minibatch_size=None):
        """
        Constructor for convenience. Constructs a mf-dgp model from training data and inducing point locations

        :param X: List of target
        :param Y:
        :param Z:
        :param add_linear:
        :return:
        """

        n_fidelities = len(X)

        Din = X[0].shape[1]
        Dout = Y[0].shape[1]

        kernels = [
            RBF(Din,
                active_dims=list(range(Din)),
                variance=1.0,
                lengthscales=1,
                ARD=True)
        ]
        for l in range(1, n_fidelities):
            D = Din + Dout
            D_range = list(range(D))
            k_corr = RBF(Din,
                         active_dims=D_range[:Din],
                         lengthscales=1,
                         variance=1.0,
                         ARD=True)
            k_prev = RBF(Dout,
                         active_dims=D_range[Din:],
                         variance=1.0,
                         lengthscales=1.0)
            k_in = RBF(Din,
                       active_dims=D_range[:Din],
                       variance=1.0,
                       lengthscales=1,
                       ARD=True)
            if add_linear:
                k_l = k_corr * (k_prev + Linear(
                    Dout, active_dims=D_range[Din:], variance=1.0)) + k_in
            else:
                k_l = k_corr * k_prev + k_in
            kernels.append(k_l)
        """
        A White noise kernel is currently expected by Mf-DGP at all layers except the last.
        In cases where no noise is desired, this should be set to 0 and fixed, as follows:

            white = White(1, variance=0.)
            white.variance.trainable = False
            kernels[i] += white
        """
        for i, kernel in enumerate(kernels[:-1]):
            kernels[i] += White(1, variance=1e-6)

        num_data = 0
        for i in range(len(X)):
            _log.info("\nData at Fidelity {}".format(i + 1))
            _log.info("X - {}".format(X[i].shape))
            _log.info("Y - {}".format(Y[i].shape))
            _log.info("Z - {}".format(Z[i].shape))
            num_data += X[i].shape[0]

        layers = init_layers_mf(Y, Z, kernels, num_outputs=Dout)

        model = DGP_Base(X,
                         Y,
                         Gaussian(),
                         layers,
                         num_samples=10,
                         minibatch_size=minibatch_size)

        return model

    def multi_step_training(self, n_iter=5000, n_iter_2=15000):
        """
        Train with variational covariance fixed to be small first, then free up and train covariance alongside other
        parameters. Inducing point locations are fixed throughout.
        """
        for layer in self.layers[:-1]:
            layer.q_sqrt = layer.q_sqrt.value * 1e-8
            layer.q_sqrt.trainable = False
        self.layers[-1].q_sqrt = self.layers[-1].q_sqrt.value * self.Y_list[
            -1].var() * 0.01
        self.layers[-1].q_sqrt.trainable = False
        self.likelihood.likelihood.variance = self.Y_list[-1].var() * 0.01
        self.likelihood.likelihood.variance.trainable = False

        # Run with covariance fixed
        self.run_adam(3e-3, n_iter)

        # Run with covariance free
        self.likelihood.likelihood.variance.trainable = True

        for layer in self.layers:
            layer.q_sqrt.trainable = True

        self.run_adam(1e-3, n_iter_2)

    def fix_inducing_point_locations(self):
        """
        Fix all inducing point locations
        """
        for layer in self.layers:
            layer.feature.Z.trainable = False

    def run_adam(self, lr, iterations):
        adam = AdamOptimizer(lr).make_optimize_action(self)
        actions = [adam, PrintAction(self, "MF-DGP with Adam")]
        loop = Loop(actions, stop=iterations)()
        self.anchor(self.enquire_session())
Esempio n. 4
0
class DGP_Base(Model):
    """
    The base class for Deep Gaussian process models.

    Implements a Monte-Carlo variational bound and convenience functions.

    """
    def __init__(self,
                 X,
                 Y,
                 likelihood,
                 layers,
                 minibatch_size=None,
                 num_samples=1,
                 num_data=None,
                 div_weights=None,
                 **kwargs):
        Model.__init__(self, **kwargs)
        self.num_samples = num_samples

        self.num_data = num_data or X.shape[0]
        if minibatch_size:
            self.X = Minibatch(X, minibatch_size, seed=0)
            self.Y = Minibatch(Y, minibatch_size, seed=0)
        else:
            self.X = DataHolder(X)
            self.Y = DataHolder(Y)

        self.likelihood = BroadcastingLikelihood(likelihood)

        self.layers = ParamList(layers)
        """CHANGES START"""
        """Weights for the uncertainty quantifiers (per layer)"""
        if div_weights is None:
            div_weights = [1.0] * len(
                layers)  #multiply by 1, i.e. don't change
        elif type(div_weights) == list and len(div_weights) != len(layers):
            print(
                "WARNING! You specified a list of weights for the " +
                "uncertainty quantifiers, but your DGP has more/less layers " +
                "than the number of weights you specified! " +
                "We set all weights to 1.0")
            div_weights = [1.0] * len(layers)
        elif type(div_weights) == list and len(div_weights) == len(layers):
            div_weights = div_weights
        """Distribute the weights into the layers"""
        for layer, weight in zip(layers, div_weights):
            layer.set_weight(weight)
        """CHANGES EEND"""

    @params_as_tensors
    def propagate(self, X, full_cov=False, S=1, zs=None):
        sX = tf.tile(tf.expand_dims(X, 0), [S, 1, 1])

        Fs, Fmeans, Fvars = [], [], []

        F = sX
        zs = zs or [
            None,
        ] * len(self.layers)
        for layer, z in zip(self.layers, zs):
            F, Fmean, Fvar = layer.sample_from_conditional(F,
                                                           z=z,
                                                           full_cov=full_cov)

            Fs.append(F)
            Fmeans.append(Fmean)
            Fvars.append(Fvar)

        return Fs, Fmeans, Fvars

    @params_as_tensors
    def _build_predict(self, X, full_cov=False, S=1):
        Fs, Fmeans, Fvars = self.propagate(X, full_cov=full_cov, S=S)
        return Fmeans[-1], Fvars[-1]

    def E_log_p_Y(self, X, Y):
        """
        Calculate the expectation of the data log likelihood under the variational distribution
         with MC samples
        """

        Fmean, Fvar = self._build_predict(X,
                                          full_cov=False,
                                          S=self.num_samples)
        """
        Below function:
             
        Compute the expected log density of the data, given a Gaussian
        distribution for the function values.

        if
            q(f) = N(Fmu, Fvar)

        and this object represents

            p(y|f)

        then this method computes

           \int (\log p(y|f)) q(f) df.


        Here, we implement a default Gauss-Hermite quadrature routine, but some
        likelihoods (Gaussian, Poisson) will implement specific cases.
        """
        """CHANGES START"""
        # convert from S, N, D => N, D
        if (isinstance(self.likelihood.likelihood, gammaDivGaussian)):
            #For the gamma-div, the loss is strictly > 0 so we can log everything
            return tf.reduce_logsumexp(
                tf.cast(self.likelihood.variational_expectations(
                    Fmean, Fvar, Y),
                        dtype=tf.float64), 0) - np.log(self.num_samples)
        elif (isinstance(self.likelihood.likelihood, betaDivGaussian)):
            #For the beta-div, we need to treat both terms separately
            log_tempered, log_integral = self.likelihood.variational_expectations(
                Fmean, Fvar, Y)
            log_tempered_avg = tf.reduce_logsumexp(
                tf.cast(log_tempered, dtype=tf.float64), 0) - np.log(
                    self.num_samples)
            return log_tempered_avg, log_integral

        else:
            #Standard procedure of original code
            var_exp = self.likelihood.variational_expectations(Fmean, Fvar,
                                                               Y)  # S, N, D
            return tf.reduce_mean(var_exp, 0)  # N, D
        """CHANGES END"""

    @params_as_tensors
    def _build_likelihood(self):
        """CHANGES START"""
        if isinstance(self.likelihood.likelihood, gammaDivGaussian):
            L = tf.exp(tf.reduce_logsumexp(self.E_log_p_Y(self.X, self.Y)))
        elif isinstance(self.likelihood.likelihood, betaDivGaussian):
            L1, L2 = self.E_log_p_Y(self.X, self.Y)
            L = tf.exp(tf.reduce_logsumexp(L1)) + self.num_data * tf.cast(
                tf.shape(self.X)[0], float_type)
        else:
            L = tf.reduce_sum(self.E_log_p_Y(self.X, self.Y))
        """CHANGES END"""

        KL = tf.reduce_sum([layer.KL() for layer in self.layers])
        scale = tf.cast(self.num_data, float_type)
        scale /= tf.cast(tf.shape(self.X)[0], float_type)  # minibatch size
        return L * scale - KL

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f(self, Xnew, num_samples):
        return self._build_predict(Xnew, full_cov=False, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_f_full_cov(self, Xnew, num_samples):
        return self._build_predict(Xnew, full_cov=True, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=False, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_all_layers_full_cov(self, Xnew, num_samples):
        return self.propagate(Xnew, full_cov=True, S=num_samples)

    @autoflow((float_type, [None, None]), (tf.int32, []))
    def predict_y(self, Xnew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        return self.likelihood.predict_mean_and_var(Fmean, Fvar)

    @autoflow((float_type, [None, None]), (float_type, [None, None]),
              (tf.int32, []))
    def predict_density(self, Xnew, Ynew, num_samples):
        Fmean, Fvar = self._build_predict(Xnew, full_cov=False, S=num_samples)
        l = self.likelihood.predict_density(Fmean, Fvar, Ynew)
        log_num_samples = tf.log(tf.cast(num_samples, float_type))
        return tf.reduce_logsumexp(l - log_num_samples, axis=0)