def logLikelihood(self,X,y,noise=None,alpha=None,variance=None,mu=None,gradient=False): if alpha is None: alpha=self.alpha if variance is None: variance=self.variance if mu is None: mu=self.mu if noise is None: K=self.A(X,alpha=alpha,variance=variance) else: K=self.A(X,alpha=alpha,variance=variance,noise=noise) y2=y-mu N=X.shape[0] try: L=np.linalg.cholesky(K) alp=inverseComp(L,y2) logLike=-0.5*np.dot(y2,alp)-np.sum(np.log(np.diag(L)))-0.5*N*np.log(2.0*np.pi) if gradient==False: return logLike gradient=np.zeros(self.dimension+2) ###0 to n-1, gradient respect to alpha ###n, gradient respect to log(variance) ###n+1,gradient respect to mu temp=np.dot(alp[:,None],alp[None,:]) K2=self.A(X,alpha=alpha,variance=variance) for i in range(self.dimension): derivative=np.zeros((N,N)) derivative=K2*(-0.5*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2)) temp3=inverseComp(L,derivative) gradient[i]=0.5*np.trace(np.dot(temp,derivative)-temp3) der=self.K(X,alpha=alpha,variance=variance) temp3=inverseComp(L,der) gradient[self.dimension]=0.5*np.trace(np.dot(temp,der)-temp3) der=np.ones((N,N)) temp3=inverseComp(L,der) gradient[self.dimension+1]=0.5*np.trace(np.dot(temp,der)-temp3) return logLike,gradient except: L=np.linalg.inv(K) det=np.linalg.det(L) logLike=-0.5*np.dot(y2,np.dot(L,y2))-0.5*N*np.log(2*np.pi)-0.5*np.log(det) if gradient==False: return logLike gradient=np.zeros(self.dimension+2) alp=np.dot(L,y2) temp=np.dot(alp[:,None],alp.T[None,:]) K2=self.A(X,alpha=alpha,variance=variance) for i in range(self.dimension): derivative=np.zeros((N,N)) derivative=K2*(-1.0*alpha[i]*((X[:,i][:,None]-X[:,i][None,:])**2)) temp2=np.dot(temp-L,derivative) gradient[i]=0.5*np.trace(temp2) temp2=np.dot(temp-L,K2) gradient[self.dimension]=0.5*np.trace(temp2) der=np.ones((N,N)) temp2=np.dot(temp-L,der) gradient[self.dimension+1]=0.5*np.trace(temp2) return logLike,gradient
def logLikelihood(self,X,y,noise=None,alpha=None,variance=None,mu=None,gradient=False): """ Computes the log-likelihood and its gradient. The gradient is respect to log(var) and log(alpha**2). Args: -X: Matrix with the training data. -y: Output of the training data. -noise: Noise of the outputs. -alpha: Hyperparameters of the kernel -variance: Hyperparameter of the kernel. -mu: Mean parameter of the GP. -gradient: True if we want the gradient; False otherwise. """ if alpha is None: alpha=self.alpha if variance is None: variance=self.variance if mu is None: mu=self.mu if noise is None: K=self.A(X,alpha=alpha,variance=variance) else: K=self.A(X,alpha=alpha,variance=variance,noise=noise) y2=y-mu N=X.shape[0] try: L=np.linalg.cholesky(K) alp=inverseComp(L,y2) logLike=-0.5*np.dot(y2,alp)-np.sum(np.log(np.diag(L)))-0.5*N*np.log(2.0*np.pi) if gradient==False: return logLike gradient=np.zeros(self.dimension+2) temp=np.dot(alp[:,None],alp[None,:]) K2=self.A(X,alpha=alpha,variance=variance) for i in range(self.dimension): derivative=np.zeros((N,N)) derivative=K2*(-(0.5/(self.scaleAlpha**2))*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2)) temp3=inverseComp(L,derivative) gradient[i]=0.5*np.trace(np.dot(temp,derivative)-temp3) der=self.K(X,alpha=alpha,variance=variance) temp3=inverseComp(L,der) gradient[self.dimension]=0.5*np.trace(np.dot(temp,der)-temp3) der=np.ones((N,N)) temp3=inverseComp(L,der) gradient[self.dimension+1]=0.5*np.trace(np.dot(temp,der)-temp3) return logLike,gradient except: print "no" L=np.linalg.inv(K) det=np.linalg.det(K) logLike=-0.5*np.dot(y2,np.dot(L,y2))-0.5*N*np.log(2*np.pi)-0.5*np.log(det) if gradient==False: return logLike gradient=np.zeros(self.dimension+2) alp=np.dot(L,y2) temp=np.dot(alp[:,None],alp.T[None,:]) K2=self.A(X,alpha=alpha,variance=variance) for i in range(self.dimension): derivative=np.zeros((N,N)) derivative=K2*(-(0.5/(self.scaleAlpha**2))*(alpha[i]**2)*((X[:,i][:,None]-X[:,i][None,:])**2)) temp2=np.dot(temp-L,derivative) gradient[i]=0.5*np.trace(temp2) temp2=np.dot(temp-L,K2) gradient[self.dimension]=0.5*np.trace(temp2) der=np.ones((N,N)) temp2=np.dot(temp-L,der) gradient[self.dimension+1]=0.5*np.trace(temp2) return logLike,gradient