Ejemplo n.º 1
0
    def compute_log_ei(self, x, incumbent):

        Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + \
            T.eye(self.z.shape[0]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * \
            casting(self.n_points - self.set_for_training) / \
            casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(
            covCavity,
            casting(self.n_points - self.set_for_training) /
            casting(self.n_points) * self.mParamPost)
        KzzInvcovCavity = T.dot(KzzInv, covCavity)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        B = T.dot(KzzInvcovCavity, KzzInv) - KzzInv
        v_out = T.exp(self.lsf) + T.dot(Kxz * T.dot(Kxz, B),
                                        T.ones_like(self.z[:, 0:1]))
        m_out = T.dot(Kxz, KzzInvmeanCavity)
        s = (incumbent - m_out) / T.sqrt(v_out)

        log_ei = T.log((incumbent - m_out) * ratio(s) +
                       T.sqrt(v_out)) + log_n_pdf(s)

        return log_ei
Ejemplo n.º 2
0
    def setForTraining(self):

        # We only do something if the node was set for prediction instead of
        # training

        if self.set_for_training == casting(0.0):
            self.set_for_training == casting(1.0)
Ejemplo n.º 3
0
    def getLogNormalizerCavity(self):

        assert self.covCavity is not None and self.meanCavity is not None and \
            self.covCavityInv is not None

        return casting(0.5 * self.n_inducing_points * np.log(2 * np.pi)) + \
            casting(0.5) * T.nlinalg.LogDetPSD()(self.covCavity) + \
            casting(0.5) * T.dot(T.dot(T.transpose(self.meanCavity),
                                       self.covCavityInv), self.meanCavity)
Ejemplo n.º 4
0
    def getLogNormalizerPosterior(self):

        assert self.covPosterior is not None \
            and self.meanPosterior is not None \
            and self.covPosteriorInv is not None

        return casting(0.5 * self.n_inducing_points * np.log(2 * np.pi)) + \
            casting(0.5) * T.nlinalg.LogDetPSD()(self.covPosterior) + \
            casting(0.5) * T.dot(T.dot(T.transpose(self.meanPosterior),
                                       self.covPosteriorInv),
                                 self.meanPosterior)
Ejemplo n.º 5
0
    def __init__(self, n_inducing_points, n_points, input_d, input_means,
                 input_vars, training_targets):

        self.ignore_variances = True
        self.n_inducing_points = n_inducing_points
        self.n_points = n_points
        self.input_d = input_d
        self.training_targets = training_targets
        self.input_means = input_means
        self.input_vars = input_vars

        # These are the actual parameters of the posterior distribution being
        # optimzied
        # covCavity = (Kzz^-1 + LParamPost LParamPost^T * (n - 1) / n) and
        # meanCavity = covCavity mParamPost * (n - 1) / n

        initial_value = np.zeros((n_inducing_points, n_inducing_points))
        self.LParamPost = theano.shared(value=initial_value.astype(
            theano.config.floatX),
                                        name='LParamPost',
                                        borrow=True)
        self.mParamPost = theano.shared(value=initial_value[:, 0:1].astype(
            theano.config.floatX),
                                        name='mParamPost',
                                        borrow=True)
        self.lls = theano.shared(value=np.zeros(input_d).astype(
            theano.config.floatX),
                                 name='lls',
                                 borrow=True)
        self.lsf = theano.shared(value=np.zeros(1).astype(
            theano.config.floatX)[0],
                                 name='lsf',
                                 borrow=True)
        self.z = theano.shared(value=np.zeros(
            (n_inducing_points, input_d)).astype(theano.config.floatX),
                               name='z',
                               borrow=True)
        self.lvar_noise = theano.shared(
            value=casting(0) * np.ones(1).astype(theano.config.floatX)[0],
            name='lvar_noise',
            borrow=True)

        self.set_for_training = casting(1.0)

        # We set the level of jitter to use  (added to the diagonal of Kzz)

        self.jitter = casting(1e-3)
Ejemplo n.º 6
0
    def getContributionToEnergy(self):

        assert self.n_points is not None \
            and self.covCavity is not None \
            and self.covPosterior is not None \
            and self.input_means is not None

        logZpost = self.getLogNormalizerPosterior()
        logZprior = self.getLogNormalizerPrior()
        logZcav = self.getLogNormalizerCavity()

        # We multiply by the minibatch size and normalize terms according to
        # the total number of points (n_points)

        return ((logZcav - logZpost) + logZpost / casting(self.n_points) -
                logZprior / casting(self.n_points)) * \
            T.cast(self.input_means.shape[0],
                   'float32') + T.sum(self.getLogZ())
Ejemplo n.º 7
0
    def compute_log_averaged_ei(self, x, X, randomness, incumbent):

        # We compute the old predictive mean at x

        Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + \
            T.eye(self.z.shape[0]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * \
            casting(self.n_points - self.set_for_training) / \
            casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(
            covCavity,
            casting(self.n_points - self.set_for_training) /
            casting(self.n_points) * self.mParamPost)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        m_old_x = T.dot(Kxz, KzzInvmeanCavity)

        # We compute the old predictive mean at X

        KXz = compute_kernel(self.lls, self.lsf, X, self.z)
        m_old_X = T.dot(KXz, KzzInvmeanCavity)

        # We compute the required cross covariance matrices

        KXX = compute_kernel(self.lls, self.lsf, X, X) - \
            T.dot(T.dot(KXz, KzzInv),
                  KXz.T) + T.eye(X.shape[0]) * self.jitter * T.exp(self.lsf)
        KXXInv = T.nlinalg.MatrixInversePSD()(KXX)

        KxX = compute_kernel(self.lls, self.lsf, x, X)
        xX = T.concatenate([x, X], 0)
        KxXz = compute_kernel(self.lls, self.lsf, xX, self.z)
        KxX = KxX - T.dot(T.dot(KxXz[0:x.shape[0], :], KzzInv),
                          KxXz[x.shape[0]:xX.shape[0], :].T)

        # We compute the new posterior mean

        samples_internal = T.dot(MatrixChol()(KXX), randomness)

        new_predictive_mean = T.tile(
            m_old_x, [1, randomness.shape[1]]) + \
            T.dot(KxX, T.dot(KXXInv, samples_internal))

        # We compute the new posterior variance

        z_expanded = T.concatenate([self.z, X], 0)
        Kxz_expanded = compute_kernel(self.lls, self.lsf, x, z_expanded)
        Kzz_expanded = compute_kernel(
            self.lls, self.lsf, z_expanded, z_expanded) + T.eye(
                z_expanded.shape[0]) * self.jitter * T.exp(self.lsf)
        Kzz_expandedInv = T.nlinalg.MatrixInversePSD()(Kzz_expanded)
        v_out = T.exp(self.lsf) - T.dot(
            Kxz_expanded * T.dot(Kxz_expanded, Kzz_expandedInv),
            T.ones_like(z_expanded[:, 0:1]))
        new_predictive_var = T.tile(v_out, [1, randomness.shape[1]])

        s = (incumbent - new_predictive_mean) / T.sqrt(new_predictive_var)

        log_ei = T.log((incumbent - new_predictive_mean) * ratio(s) +
                       T.sqrt(new_predictive_var)) + log_n_pdf(s)

        return T.mean(LogSumExp(log_ei, 1), 1)
Ejemplo n.º 8
0
    def setForPrediction(self):

        if self.set_for_training == casting(1.0):

            self.set_for_training = casting(0.0)
Ejemplo n.º 9
0
def ratio(x):
    x = T.switch(
        T.lt(x, casting(-10)), -(casting(1.0) / x - casting(1.0) / x**3 +
                                 casting(3.0) / x**5 - casting(15.0) / x**7),
        n_cdf(x) / n_pdf(x))
    return x
Ejemplo n.º 10
0
def log_n_cdf(x):
    x = T.switch(T.lt(x, casting(-10)), log_n_cdf_approx(x), T.log(n_cdf(x)))
    return x
Ejemplo n.º 11
0
    def getLogNormalizerPrior(self):

        assert self.KzzInv is not None

        return casting(0.5 * self.n_inducing_points * np.log(2 * np.pi)) - \
            casting(0.5) * T.nlinalg.LogDetPSD()(self.KzzInv)
Ejemplo n.º 12
0
    def compute_output(self):

        # We compute the output mean

        self.Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + \
            T.eye(self.z.shape[0]) * self.jitter * T.exp(self.lsf)
        self.KzzInv = T.nlinalg.MatrixInversePSD()(self.Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        self.covCavityInv = self.KzzInv + LLt * \
            casting(self.n_points - self.set_for_training) / \
            casting(self.n_points)
        self.covCavity = T.nlinalg.MatrixInversePSD()(self.covCavityInv)
        self.meanCavity = T.dot(
            self.covCavity,
            casting(self.n_points - self.set_for_training) /
            casting(self.n_points) * self.mParamPost)
        self.KzzInvcovCavity = T.dot(self.KzzInv, self.covCavity)
        self.KzzInvmeanCavity = T.dot(self.KzzInv, self.meanCavity)
        self.covPosteriorInv = self.KzzInv + LLt
        self.covPosterior = T.nlinalg.MatrixInversePSD()(self.covPosteriorInv)
        self.meanPosterior = T.dot(self.covPosterior, self.mParamPost)
        self.Kxz = compute_kernel(self.lls, self.lsf, self.input_means, self.z)
        self.B = T.dot(self.KzzInvcovCavity, self.KzzInv) - self.KzzInv
        v_out = T.exp(self.lsf) + T.dot(self.Kxz * T.dot(self.Kxz, self.B),
                                        T.ones_like(self.z[:, 0:1]))

        if self.ignore_variances:

            self.output_means = T.dot(self.Kxz, self.KzzInvmeanCavity)
            self.output_vars = abs(v_out) + casting(0) * T.sum(self.input_vars)

        else:

            self.EKxz = compute_psi1(self.lls, self.lsf, self.input_means,
                                     self.input_vars, self.z)
            self.output_means = T.dot(self.EKxz, self.KzzInvmeanCavity)

            # In other layers we have to compute the expected variance

            self.B2 = T.outer(T.dot(self.KzzInv, self.meanCavity),
                              T.dot(self.KzzInv, self.meanCavity))

            exact_output_vars = True

            if exact_output_vars:

                # We compute the exact output variance

                self.psi2 = compute_psi2(self.lls, self.lsf, self.z,
                                         self.input_means, self.input_vars)
                ll = T.transpose(self.EKxz[:, None, :] * self.EKxz[:, :, None],
                                 [1, 2, 0])
                kk = T.transpose(self.Kxz[:, None, :] * self.Kxz[:, :, None],
                                 [1, 2, 0])
                v1 = T.transpose(
                    T.sum(T.sum(
                        T.shape_padaxis(self.B2, 2) * (self.psi2 - ll), 0),
                          0,
                          keepdims=True))
                v2 = T.transpose(
                    T.sum(T.sum(
                        T.shape_padaxis(self.B, 2) * (self.psi2 - kk), 0),
                          0,
                          keepdims=True))

            else:

                # We compute the approximate output variance using the
                # unscented kalman filter

                v1 = 0
                v2 = 0

                n = self.input_d
                for j in range(1, n + 1):
                    mask = T.zeros_like(self.input_vars)
                    mask = T.set_subtensor(mask[:, j - 1], 1)
                    inc = mask * T.sqrt(casting(n) * self.input_vars)
                    self.kplus = T.sqrt(casting(1.0) / casting(2 * n)) * \
                        compute_kernel(
                        self.lls, self.lsf, self.input_means + inc, self.z)
                    self.kminus = T.sqrt(casting(1.0) / casting(2 * n)) *\
                        compute_kernel(
                        self.lls, self.lsf, self.input_means - inc, self.z)

                    v1 += T.dot(self.kplus * T.dot(self.kplus, self.B2),
                                T.ones_like(self.z[:, 0:1]))
                    v1 += T.dot(self.kminus * T.dot(self.kminus, self.B2),
                                T.ones_like(self.z[:, 0:1]))
                    v2 += T.dot(self.kplus * T.dot(self.kplus, self.B),
                                T.ones_like(self.z[:, 0:1]))
                    v2 += T.dot(self.kminus * T.dot(self.kminus, self.B),
                                T.ones_like(self.z[:, 0:1]))

                v1 -= T.dot(self.EKxz * T.dot(self.EKxz, self.B2),
                            T.ones_like(self.z[:, 0:1]))
                v2 -= T.dot(self.Kxz * T.dot(self.Kxz, self.B),
                            T.ones_like(self.z[:, 0:1]))

            self.output_vars = abs(v_out) + abs(v2) + abs(v1)

        self.output_vars = self.output_vars + T.exp(self.lvar_noise)

        return