def logLikelihood(self,X,y,noise=None,alpha=None,variance=None,mu=None,gradient=False):
        if alpha is None:
            alpha=self.alpha
        if variance is None:
            variance=self.variance
        if mu is None:
            mu=self.mu
        if noise is None:
            K=self.A(X,alpha=alpha,variance=variance)
        else:
            K=self.A(X,alpha=alpha,variance=variance,noise=noise)
        y2=y-mu
        N=X.shape[0]
        try:
            L=np.linalg.cholesky(K)
            alp=inverseComp(L,y2)
            logLike=-0.5*np.dot(y2,alp)-np.sum(np.log(np.diag(L)))-0.5*N*np.log(2.0*np.pi)
            if gradient==False:
                return logLike
            gradient=np.zeros(self.dimension+2)
            
            ###0 to n-1, gradient respect to alpha
            ###n, gradient respect to log(variance)
            ###n+1,gradient respect to mu
            temp=np.dot(alp[:,None],alp[None,:])
            K2=self.A(X,alpha=alpha,variance=variance)
            for i in range(self.dimension):
                derivative=np.zeros((N,N))
                derivative=K2*(-0.5*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2))
                temp3=inverseComp(L,derivative)
                gradient[i]=0.5*np.trace(np.dot(temp,derivative)-temp3)
            
            der=self.K(X,alpha=alpha,variance=variance)
            temp3=inverseComp(L,der)
            gradient[self.dimension]=0.5*np.trace(np.dot(temp,der)-temp3)

            der=np.ones((N,N))
            temp3=inverseComp(L,der)
            gradient[self.dimension+1]=0.5*np.trace(np.dot(temp,der)-temp3)
            return logLike,gradient
        except:
            L=np.linalg.inv(K)
            det=np.linalg.det(L)
            logLike=-0.5*np.dot(y2,np.dot(L,y2))-0.5*N*np.log(2*np.pi)-0.5*np.log(det)
            if gradient==False:
                return logLike
            gradient=np.zeros(self.dimension+2)
            
            alp=np.dot(L,y2)
            temp=np.dot(alp[:,None],alp.T[None,:])
            K2=self.A(X,alpha=alpha,variance=variance)
            for i in range(self.dimension):
                derivative=np.zeros((N,N))
                derivative=K2*(-1.0*alpha[i]*((X[:,i][:,None]-X[:,i][None,:])**2))
                temp2=np.dot(temp-L,derivative)
                gradient[i]=0.5*np.trace(temp2)
            
            temp2=np.dot(temp-L,K2)
            gradient[self.dimension]=0.5*np.trace(temp2)
            
            der=np.ones((N,N))
            temp2=np.dot(temp-L,der)
            gradient[self.dimension+1]=0.5*np.trace(temp2)
            return logLike,gradient
    def logLikelihood(self,X,y,noise=None,alpha=None,variance=None,mu=None,gradient=False):
        """
        Computes the log-likelihood and its gradient. The gradient is respect to  log(var)
        and log(alpha**2).
        
        Args:
            -X: Matrix with the training data.
            -y: Output of the training data.
            -noise: Noise of the outputs.
            -alpha: Hyperparameters of the kernel
            -variance: Hyperparameter of the kernel.
            -mu: Mean parameter of the GP.
            -gradient: True if we want the gradient; False otherwise.
        """
        if alpha is None:
            alpha=self.alpha
        if variance is None:
            variance=self.variance
        if mu is None:
            mu=self.mu
        if noise is None:
            K=self.A(X,alpha=alpha,variance=variance)
        else:
            K=self.A(X,alpha=alpha,variance=variance,noise=noise)
        y2=y-mu
        N=X.shape[0]
        try:
            L=np.linalg.cholesky(K)
            alp=inverseComp(L,y2)
            logLike=-0.5*np.dot(y2,alp)-np.sum(np.log(np.diag(L)))-0.5*N*np.log(2.0*np.pi)
            if gradient==False:
                return logLike
            gradient=np.zeros(self.dimension+2)
            
            temp=np.dot(alp[:,None],alp[None,:])
            K2=self.A(X,alpha=alpha,variance=variance)
            for i in range(self.dimension):
                derivative=np.zeros((N,N))
                derivative=K2*(-(0.5/(self.scaleAlpha**2))*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2))
                temp3=inverseComp(L,derivative)
                gradient[i]=0.5*np.trace(np.dot(temp,derivative)-temp3)
            
            der=self.K(X,alpha=alpha,variance=variance)
            temp3=inverseComp(L,der)
            gradient[self.dimension]=0.5*np.trace(np.dot(temp,der)-temp3)

            der=np.ones((N,N))
            temp3=inverseComp(L,der)
            gradient[self.dimension+1]=0.5*np.trace(np.dot(temp,der)-temp3)
            return logLike,gradient
        except:
            print "no"
            L=np.linalg.inv(K)
            det=np.linalg.det(K)
            logLike=-0.5*np.dot(y2,np.dot(L,y2))-0.5*N*np.log(2*np.pi)-0.5*np.log(det)
            if gradient==False:
                return logLike
            gradient=np.zeros(self.dimension+2)
            
            alp=np.dot(L,y2)
            temp=np.dot(alp[:,None],alp.T[None,:])
            K2=self.A(X,alpha=alpha,variance=variance)
            for i in range(self.dimension):
                derivative=np.zeros((N,N))
                derivative=K2*(-(0.5/(self.scaleAlpha**2))*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2))
                temp2=np.dot(temp-L,derivative)
                gradient[i]=0.5*np.trace(temp2)
            
            temp2=np.dot(temp-L,K2)
            gradient[self.dimension]=0.5*np.trace(temp2)
            
            der=np.ones((N,N))
            temp2=np.dot(temp-L,der)
            gradient[self.dimension+1]=0.5*np.trace(temp2)
            return logLike,gradient