예제 #1
0
    def __init__(self, input_dim, output_dim, kern, Z, beta=10.0, natgrads=False, S_param='chol', name='layer'):
        super(Layer, self).__init__(name)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_inducing = Z.shape[0]

        #a factor by which to multiply the KL (only used in parallel implementations)
        self.KL_scaling = 1.

        #store Z, kern, beta in this Parameterized object.
        assert Z.shape[1] == self.input_dim
        self.kern = kern
        self.Z = GPy.core.Param('Z', Z)
        self.beta = GPy.core.Param('beta', beta, GPy.core.parameterization.transformations.Logexp())
        self.link_parameters(self.Z, self.kern, self.beta)

        self.natgrads = natgrads
        # initialize q(U)
        #make the mean a random draw from I
        if not self.natgrads:
            self.q_of_U_mean = GPy.core.Param('q(U)_mean', np.random.randn(self.num_inducing, self.output_dim))
            self.link_parameter(self.q_of_U_mean)
            #make the mean a random draw from Kmm
            #self.q_of_U_mean = GPy.core.Param('q(U)_mean', np.random.multivariate_normal(np.zeros(self.num_inducing), self.kern.K(self.Z), self.output_dim).T)

            self.S_param = S_param
            if S_param=='chol':
                chols = choleskies.triang_to_flat(np.dstack([np.eye(self.num_inducing)*0.1 for i in range(self.output_dim)]))
                self.q_of_U_choleskies = GPy.core.Param('q(U)_chol', chols)
                self.link_parameter(self.q_of_U_choleskies)
            elif S_param=='diag':
                self.q_of_U_diags = GPy.core.Param('q(U)_diag',np.ones((self.num_inducing, self.output_dim)),GPy.core.parameterization.transformations.Logexp())
                self.link_parameter(self.q_of_U_diags)
            else:
                raise NotImplementedError

        else:
            #initialize using the natural gradient method
            mean = np.random.randn(self.num_inducing, self.output_dim)
            precision = np.dstack([np.eye(self.num_inducing)*10 for i in range(self.output_dim)])
            Sim = np.einsum('ijk,jk->ik', precision, mean)
            self.set_vb_param(np.hstack((Sim.flatten(), -0.5*precision.flatten() )))

        #and empty list to contain the lower layers
        self.lower_layers = []
예제 #2
0
    def gradient_updates(self):
        """set the derivatives in the kernel and in Z"""
        self.kern.update_gradients_full(self.dL_dKmm, self.Z)
        g = self.kern._gradient_array_.copy()
#         self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:], self.q_of_X_in.shape[0], axis=0)
        self.kern.update_gradients_expectations(Z=self.Z, variational_posterior=self.q_of_X_in, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2)
        self.kern._gradient_array_ += g

        self.Z.gradient = self.kern.gradients_X(self.dL_dKmm, self.Z)
        self.Z.gradient += self.kern.gradients_Z_expectations(Z=self.Z, variational_posterior=self.q_of_X_in, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2, dL_dpsi0=self.dL_dpsi0)

        if not self.natgrads:
            self.q_of_U_mean.gradient = self.dL_dEu + 2.*np.einsum('ijk,jk->ik', self.dL_duuT, self.q_of_U_mean)
            if self.S_param is 'chol':
                L = choleskies.flat_to_triang(self.q_of_U_choleskies)
                dL_dchol = 2.*np.einsum('ijk,jlk->ilk', self.dL_duuT, L)
                self.q_of_U_choleskies.gradient = choleskies.triang_to_flat(dL_dchol)
            else:
                self.q_of_U_diags.gradient = np.vstack([np.diag(self.dL_duuT[:,:,i]) for i in xrange(self.output_dim)]).T
예제 #3
0
    def gradient_updates(self):
        #note that the kerel gradients are a little different because there's no q(X), just a fixed X
        self.kern.update_gradients_full(self.dL_dKmm, self.Z)
        g = self.kern._gradient_array_.copy()
        dL_dKnm = self.dL_dpsi1 + 2.*self.psi1.dot(self.dL_dpsi2)
        self.kern.update_gradients_full(dL_dKnm, self.X, self.Z)
        g += self.kern._gradient_array_.copy()
        self.kern.update_gradients_diag(self.dL_dpsi0, self.X)
        self.kern._gradient_array_ += g

        self.Z.gradient = self.kern.gradients_X(self.dL_dKmm, self.Z)
        self.Z.gradient += self.kern.gradients_X(dL_dKnm.T, self.Z, self.X)

        self.q_of_U_mean.gradient = self.dL_dEu + 2.*np.einsum('ijk,jk->ik',self.dL_duuT, self.q_of_U_mean)
        if self.S_param is 'chol':
            L = choleskies.flat_to_triang(self.q_of_U_choleskies)
            dL_dchol = 2.*np.einsum('ijk,jlk->ilk', self.dL_duuT, L)
            self.q_of_U_choleskies.gradient = choleskies.triang_to_flat(dL_dchol)
        else:
            self.q_of_U_diags.gradient = np.vstack([np.diag(self.dL_duuT[:,:,i]) for i in xrange(self.output_dim)]).T