def predict_components(self, Xnew): """The predictive density under each component""" tmp = [dtrtrs(L, self.Sy_chol_inv, lower=1)[0] for L in self._C_chols] B_invs = [ phi_hat_i * np.dot(tmp_i.T, tmp_i) for phi_hat_i, tmp_i in zip(self.phi_hat, tmp) ] kx = self.kernF.K(self.X, Xnew) try: kxx = self.kernF.K(Xnew) + self.kernY.K(Xnew) except TypeError: #kernY has a hierarchical structure that we should deal with con = np.ones((Xnew.shape[0], self.kernY.connections.shape[1])) kxx = self.kernF.K(Xnew) + self.kernY.K(Xnew, con) #prediction as per my notes tmp = [np.eye(self.D) - np.dot(Bi, self.Sf) for Bi in B_invs] mu = [ mdot(kx.T, tmpi, self.Sy_inv, ybark) for tmpi, ybark in zip(tmp, self.ybark.T) ] var = [kxx - mdot(kx.T, Bi, kx) for Bi in B_invs] return mu, var
def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega)) Lo = np.column_stack((self.basis_omega,self.basis_omega)) Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2)) r,omega,phi = self._cos_factorization(La,Lo,Lp) Gint = self._int_computation( r,omega,phi, r,omega,phi) Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None] #dK_dvar dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T) #dK_dlen da_dlen = [-1./self.lengthscale**2,0.] dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega)) r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp) dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi) dGint_dlen = dGint_dlen + dGint_dlen.T dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T) #dK_dper dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X) dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period)) dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi)) r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper) IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2)) IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2)) IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T)) IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T)) IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1) IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi) IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi) IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T) IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T) IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1) dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period)) dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2)) r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi) dGint_dper = dGint_dper + dGint_dper.T dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T))) dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag) target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag) target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
def A_n_analytical(Z_f): Z_shaped = Z_f.reshape((M, Dim)) An = mdot(kernel.K(X[np.newaxis, n, :], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) # return kernel.gradients_X_(mdot(inv(kernel.K(Z_shaped, Z_shaped)), O), Z_shaped, X[np.newaxis, n,:])- \ return kernel.gradients_X( mdot(An.T, mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).T), Z_shaped)
def KL_normal(m1, sigma1, m2, sigma2): """ Calculates the KL divergence between two normal distributions specified by N(``mu1``, ``sigma1``), N(``mu2``, ``sigma2``) """ return 1. / 2. * (math.log(det(sigma2) / det(sigma1)) - len(m1) + trace(mdot(inv(sigma2), sigma1)) + \ mdot((m2 - m1).T, inv(sigma2) , m2- m1))
def KL_normal(m1, sigma1, m2, sigma2): """ Calculates the KL divergence between two normal distributions specified by N(``mu1``, ``sigma1``), N(``mu2``, ``sigma2``) """ return 1. / 2. * (math.log(det(sigma2) / det(sigma1)) - len(m1) + trace(mdot(inv(sigma2), sigma1)) + \ mdot((m2 - m1).T, inv(sigma2) , m2- m1))
def A_n_analytical_vec(Z_f): Z_shaped = Z_f.reshape((M, Dim)) A = mdot(kernel.K(X, Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) An = mdot(kernel.K(X[np.newaxis, n, :], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) # return (kernel.get_gradients_X_AK(mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped, X)) - \ return kernel.get_gradients_X_SKD( A, mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped)[1, :, :]
def ll_F_Y(self, F, Y): W = F[:, :, :self.P * self.Q].reshape(F.shape[0], F.shape[1], self.P, self.Q) f = F[:, :, self.P * self.Q:] Wf = np.einsum('ijlk,ijk->ijl', W, f) c = 1.0 / 2 * (mdot((Y - Wf), self.sigma_inv) * (Y - Wf)).sum(axis=2) return (self.const + -c), (self.const_grad * self.sigma_y + c)
def _dcross_K(self, j): dc_dK = np.zeros((self.num_inducing, self.num_inducing)) for k in range(self.num_mog_comp): dc_dK += -0.5 * self.MoG.pi[k] * (self.invZ[j] + mdot( self.MoG.m[k, j, :, np.newaxis], self.MoG.m[k, j, :, np.newaxis].T) + self.MoG.s[k, j, :, :]) return dc_dK
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y, VVT_factr=None): # the partial derivative vector for the likelihood if likelihood.size == 0: # save computation here. dL_dR = None elif het_noise: if uncertain_inputs: raise(NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented") else: #from ...util.linalg import chol_inv #LBi = chol_inv(LB) LBi, _ = dtrtrs(LB,np.eye(LB.shape[0])) Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) dL_dR = -0.5 * beta + 0.5 * VVT_factr**2 dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2 dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2 else: # likelihood is not heteroscedatic dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) return dL_dR
def update_kern_grads(self): """ Set the derivative of the lower bound wrt the (kernel) parameters """ tmp = [dtrtrs(L, self.Sy_chol_inv, lower=1)[0] for L in self._C_chols] B_invs = [phi_hat_i*np.dot(tmp_i.T, tmp_i) for phi_hat_i, tmp_i in zip(self.phi_hat, tmp)] #B_invs = [phi_hat_i*mdot(self.Sy_chol_inv.T,Ci,self.Sy_chol_inv) for phi_hat_i, Ci in zip(self.phi_hat,self.C_invs)] #heres the mukmukT*Lambda term LiSfi = [np.eye(self.D)-np.dot(self.Sf,Bi) for Bi in B_invs]#seems okay tmp1 = [np.dot(LiSfik.T,Sy_inv_ybark_k) for LiSfik, Sy_inv_ybark_k in zip(LiSfi,self.Syi_ybark.T)] tmp = 0.5*sum([np.dot(tmpi[:,None],tmpi[None,:]) for tmpi in tmp1]) #here's the difference in log determinants term tmp += -0.5*sum(B_invs) #kernF_grads = np.array([np.sum(tmp*g) for g in self.kernF.extract_gradients()]) # OKAY! self.kernF.update_gradients_full(dL_dK=tmp,X=self.X) #gradient wrt Sigma_Y ybarkybarkT = self.ybark.T[:,None,:]*self.ybark.T[:,:,None] Byks = [np.dot(Bi,yk) for Bi,yk in zip(B_invs,self.ybark.T)] tmp = sum([np.dot(Byk[:,None],Byk[None,:])/np.power(ph_k,3)\ -Syi_ybarkybarkT_Syi/ph_k -Bi/ph_k for Bi, Byk, yyT, ph_k, Syi_ybarkybarkT_Syi in zip(B_invs, Byks, ybarkybarkT, self.phi_hat, self.Syi_ybarkybarkT_Syi) if ph_k >1e-6]) tmp += (self.K-self.N)*self.Sy_inv tmp += mdot(self.Sy_inv,self.YTY,self.Sy_inv) tmp /= 2. #kernY_grads = np.array([np.sum(tmp*g) for g in self.kernY.extract_gradients()]) self.kernY.update_gradients_full(dL_dK=tmp, X=self.X)
def predict(self, mu, sigma, Ys, model=None): #calculating var s = sigma + self.sigma alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000]) q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu z = self.warp(model.Y)[0] I = argsort(z, axis=0) sortz = sort(z, axis=0) sortt = model.Y[I] quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100) var = np.square((quant[:, 8] - (quant[:, 0])) / 4) #calculating mu H = np.array([7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07]) quard = np.array([-3.4361591, -2.5327317, -1.7566836, -1.0366108, -0.3429013, 0.3429013, 1.0366108, 1.7566836, 2.5327317, 3.4361591]) mu_quad = np.outer(np.sqrt(2 * s), quard) + mu mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100) mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi) lpd = None if not (Ys is None): ts, w = self.warp(Ys) lpd = -0.5*np.log(2*math.pi*s) - 0.5 * np.square(ts-mu)/s + np.log(w) return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
def get_gradients_SKD(self, S, D, X, X2=None): r""" Assume we have a function Ln, which its gradient wrt to the hyper-parameters (H), is as follows: dLn\\dH = S[n, :] * dK(X,X2)\\dH * D[:, n] then this function calculates dLn\\dH for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n X2 : nadrray dim(X2) = M * d Returns ------- dL_dH : ndarray dL\\dH which is a matrix by dimensions N * dim(H), where dim(H) is the number of hyper-parameters. """ kernel = self.kernel(X, X2) dk_dr = self.grad_kernel_over_dist(X, X2) if self.ARD: inv_dist = self._inverse_distances(X, X2) if X2 is None: X2 = X variance_gradient, lengthscale_gradient = self._theano_get_gradients_SKD_ARD( S, D, X, X2, kernel, inv_dist, self.lengthscale.astype(np.float32), self.variance[0].astype(np.float32), dk_dr, ) else: scaled_dist = self._scaled_dist(X, X2) variance_gradient = mdot(S, kernel, D) * 1.0 / self.variance lengthscale_gradient = np.diagonal(-mdot(S, (scaled_dist * dk_dr).T, D) / self.lengthscale)[:, np.newaxis] return np.hstack((np.diagonal(variance_gradient)[:, np.newaxis], lengthscale_gradient)).astype(np.float32)
def set_covars(self, raw_covars): raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()]) for j in xrange(self.num_latent): cholesky = np.zeros([self.num_dim, self.num_dim], dtype=np.float32) cholesky[np.tril_indices_from(cholesky)] = raw_covars[j] cholesky[np.diag_indices_from(cholesky)] = np.exp(cholesky[np.diag_indices_from(cholesky)]) self.covars_cholesky[j] = cholesky self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
def _b(self, k, j, Aj, Kzx): """ calculating [b_k]j for latent process ``j`` for all ``k`` :returns: an ndarray of dimension N * 1 """ return mdot(Aj, self.MoG.m[k, j, :].T)
def _b(self, k, j, Aj, Kzx): """ calculating [b_k]j for latent process ``j`` for all ``k`` :returns: an ndarray of dimension N * 1 """ return mdot(Aj, self.MoG.m[k, j, :].T)
def _dcorss_dm(self): """ calculating d corss / dm """ dcdm = np.empty((self.num_mog_comp, self.num_latent_proc, self.num_inducing)) for j in range(self.num_latent_proc): dcdm[:, j, :] = -mdot(self.Kzz[j, :, :], self.MoG.m[:, j, :].T).T * self.MoG.pi[:, np.newaxis] return dcdm
def K(self,X,X2,target): """Compute the covariance matrix between X and X2.""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) if X2 is None: FX2 = FX else: FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) np.add(mdot(FX,self.Gi,FX2.T), target,target)
def _dcross_K(self, j): dc_dK = np.zeros((self.num_inducing, self.num_inducing)) for k in range(self.num_mog_comp): dc_dK += -0.5 * self.MoG.pi[k] * (self.invZ[j] + mdot(self.MoG.m[k, j, :, np.newaxis], self.MoG.m[k, j, :, np.newaxis].T) + self.MoG.s[k, j, :, :] ) return dc_dK
def set_covars(self, raw_covars): raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()]) for j in range(self.num_latent): cholesky = np.zeros([self.num_dim, self.num_dim], dtype=util.PRECISION) cholesky[np.tril_indices_from(cholesky)] = raw_covars[j] cholesky[np.diag_indices_from(cholesky)] = np.exp( cholesky[np.diag_indices_from(cholesky)]) self.covars_cholesky[j] = cholesky self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
def _update(self): self.parameters = self.get_parameters() for k in range(self.num_comp): for j in range(self.num_process): temp = np.zeros((self.num_dim, self.num_dim)) temp[np.tril_indices_from(temp)] = self.L_flatten[k,j,:].copy() temp[np.diag_indices_from(temp)] = np.exp(temp[np.diag_indices_from(temp)]) # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2 self.L[k,j,:,:] = temp self.s[k,j] = mdot(self.L[k,j,:,:], self.L[k,j,:,:].T)
def _calc_nlpd(self, Ys, Wf): lpd = np.empty((Ys.shape[0], Ys.shape[1] + 1)) c = 1.0 / 2 * (mdot((Ys - Wf), self.sigma_inv) * (Ys - Wf)).sum(axis=2) lpd[:, 0] = np.log(np.exp(self.const + -c).mean(axis=0)) for i in range(Ys.shape[1]): c = 1.0 / 2 * (np.square((Ys[:, i] - Wf[:, :, i])) * self.sigma_inv[i, i]) const = -1.0 / 2 * np.log((self.sigma[i, i])) - 1.0 / 2 * np.log(2 * math.pi) lpd[:, i + 1] = np.log(np.exp(const + -c).mean(axis=0)) return lpd
def get_gradients_X_SKD(self, S, D, X): r""" Assume we have a function Ln, which its gradient wrt to the location of X, is as follows: dLn\\dX = S[n, :] * dK(X)\\dX * D[:, n] then this function calculates dLn\\dX for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n Returns ------- dL_dH : ndarray dL\\dX which is a matrix by dimensions N * d """ X2 = X invdist = self._inv_dist(X, X2) dL_dr = self.dK_dr_via_X(X, X2) tmp = invdist * dL_dr if X2 is None: tmp = tmp + tmp.T X2 = X #The high-memory numpy way: #d = X[:, None, :] - X2[None, :, :] #ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2 #the lower memory way with a loop ret = np.empty(S.shape + (self.input_dim, )) for q in xrange(self.input_dim): ret[:, :, q] = mdot( tmp * (X[:, q][:, None] - X2[:, q][None, :]), D).T * S + mdot( tmp * (X[:, q][:, None] - X2[:, q][None, :]), S.T).T * D.T ret /= self.lengthscale**2 return ret
def _calc_nlpd(self, Ys, Wf): lpd = np.empty((Ys.shape[0], Ys.shape[1] + 1)) c = 1.0 / 2 * (mdot((Ys - Wf), self.sigma_inv) * (Ys - Wf)).sum(axis=2) lpd[:, 0] = np.log(np.exp(self.const + -c).mean(axis=0)) for i in range(Ys.shape[1]): c = 1.0 / 2 * (np.square((Ys[:, i] - Wf[:, :, i])) * self.sigma_inv[i,i]) const = -1.0 / 2 * np.log((self.sigma[i,i])) - 1. / 2 * np.log(2 * math.pi) lpd[:, i+1] = np.log(np.exp(const + -c).mean(axis=0)) return lpd
def get_gradients_SKD(self, S, D, X, X2=None): r""" Assume we have a function Ln, which its gradient wrt to the hyper-parameters (H), is as follows: dLn\\dH = S[n, :] * dK(X,X2)\\dH * D[:, n] then this function calculates dLn\\dH for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n X2 : nadrray dim(X2) = M * d Returns ------- dL_dH : ndarray dL\\dH which is a matrix by dimensions N * dim(H), where dim(H) is the number of hyper-parameters. """ kernel = self.kernel(X, X2) dk_dr = self.grad_kernel_over_dist(X, X2) if self.ARD: inv_dist = self._inverse_distances(X, X2) if X2 is None: X2 = X variance_gradient, lengthscale_gradient = self._theano_get_gradients_SKD_ARD( S, D, X, X2, kernel, inv_dist, self.lengthscale.astype(np.float32), self.variance[0].astype(np.float32), dk_dr) else: scaled_dist = self._scaled_dist(X, X2) variance_gradient = mdot(S, kernel, D) * 1. / self.variance lengthscale_gradient = np.diagonal( -mdot(S, (scaled_dist * dk_dr).T, D) / self.lengthscale)[:, np.newaxis] return np.hstack((np.diagonal(variance_gradient)[:, np.newaxis], lengthscale_gradient)).astype(np.float32)
def predict_components(self,Xnew): """The predictive density under each component""" tmp = [dtrtrs(L, self.Sy_chol_inv, lower=1)[0] for L in self._C_chols] B_invs = [phi_hat_i*np.dot(tmp_i.T, tmp_i) for phi_hat_i, tmp_i in zip(self.phi_hat, tmp)] kx= self.kernF.K(self.X,Xnew) try: kxx = self.kernF.K(Xnew) + self.kernY.K(Xnew) except TypeError: #kernY has a hierarchical structure that we should deal with con = np.ones((Xnew.shape[0],self.kernY.connections.shape[1])) kxx = self.kernF.K(Xnew) + self.kernY.K(Xnew,con) #prediction as per my notes tmp = [np.eye(self.D) - np.dot(Bi,self.Sf) for Bi in B_invs] mu = [mdot(kx.T,tmpi,self.Sy_inv,ybark) for tmpi,ybark in zip(tmp,self.ybark.T)] var = [kxx - mdot(kx.T,Bi,kx) for Bi in B_invs] return mu,var
def _dcorss_dm(self): """ calculating d corss / dm """ dcdm = np.empty( (self.num_mog_comp, self.num_latent_proc, self.num_inducing)) for j in range(self.num_latent_proc): dcdm[:, j, :] = -mdot(self.Kzz[ j, :, :], self.MoG.m[:, j, :].T).T * self.MoG.pi[:, np.newaxis] return dcdm
def get_gradients_SKD(self, S, D, X, X2=None): r""" Assume we have a function Ln, which its gradient wrt to the hyper-parameters (H), is as follows: dLn\\dH = S[n, :] * dK(X,X2)\\dH * D[:, n] then this function calculates dLn\\dH for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n X2 : nadrray dim(X2) = M * d Returns ------- dL_dH : ndarray dL\\dH which is a matrix by dimensions N * dim(H), where dim(H) is the number of hyper-parameters. """ variance_gradient = mdot(S, self.K(X, X2), D) * 1. / self.variance if X2 is None: X2 = X if self.ARD: rinv = self._inv_dist(X, X2) d = X[:, None, :] - X2[None, :, :] x_xl3 = np.square(d) * (rinv * self.dK_dr_via_X(X, X2))[:, :, None] lengthscale_gradient = -np.tensordot( D, np.tensordot(S, x_xl3, (1, 0)), (0, 1)) / self.lengthscale**3 lengthscale_gradient = np.diagonal(lengthscale_gradient).T else: lengthscale_gradient = np.diagonal(-mdot( S, (self._scaled_dist(X, X2) * self.dK_dr_via_X(X, X2)).T, D) / self.lengthscale)[:, np.newaxis] return np.hstack( (np.diagonal(variance_gradient)[:, np.newaxis], lengthscale_gradient))
def transform_pi_grad(self, p): """ Returns gradient of the ``p`` array wrt to the untransformed parameters, i.e., the parameters that will be exposed to the optimiser. Parameters ---------- p : ndarray input array to calculate its gradient """ return mdot(p, self.dpi_dx())
def _grad_ell_over_covars(self, component_index, conditional_ll, kernel_products, sample_vars, normal_samples): grad = np.empty([self.num_latent] + self.gaussian_mixture.get_covar_shape()) for i in range(self.num_latent): s = weighted_average(conditional_ll, (np.square(normal_samples[i]) - 1) / sample_vars[i], self.num_samples) grad[i] = (mdot(s, np.square(kernel_products[i])) * self.gaussian_mixture.weights[component_index] / 2.) return grad
def K(self, X, X2, target): """Compute the covariance matrix between X and X2.""" FX = self._cos(self.basis_alpha[None, :], self.basis_omega[None, :], self.basis_phi[None, :])(X) if X2 is None: FX2 = FX else: FX2 = self._cos(self.basis_alpha[None, :], self.basis_omega[None, :], self.basis_phi[None, :])(X2) np.add(mdot(FX, self.Gi, FX2.T), target, target)
def transform_pi_grad(self, p): """ Returns gradient of the ``p`` array wrt to the untransformed parameters, i.e., the parameters that will be exposed to the optimiser. Parameters ---------- p : ndarray input array to calculate its gradient """ return mdot(p, self.dpi_dx())
def get_gradients_X_SKD(self, S, D, X): r""" Assume we have a function Ln, which its gradient wrt to the location of X, is as follows: dLn\\dX = S[n, :] * dK(X)\\dX * D[:, n] then this function calculates dLn\\dX for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n Returns ------- dL_dH : ndarray dL\\dX which is a matrix by dimensions N * d """ X2 = X invdist = self._inv_dist(X, X2) dL_dr = self.dK_dr_via_X(X, X2) tmp = invdist*dL_dr if X2 is None: tmp = tmp + tmp.T X2 = X #The high-memory numpy way: #d = X[:, None, :] - X2[None, :, :] #ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2 #the lower memory way with a loop ret = np.empty(S.shape + (self.input_dim,)) for q in xrange(self.input_dim): ret[:, :, q] = mdot(tmp * (X[:,q][:,None]-X2[:,q][None,:]), D).T * S + mdot(tmp * (X[:,q][:,None]-X2[:,q][None,:]), S.T).T * D.T ret /= self.lengthscale**2 return ret
def _update(self): self.parameters = self.get_parameters() for k in range(self.num_comp): for j in range(self.num_process): temp = np.zeros((self.num_dim, self.num_dim)) temp[np.tril_indices_from(temp)] = self.L_flatten[k, j, :].copy() temp[np.diag_indices_from(temp)] = np.exp( temp[np.diag_indices_from(temp)]) # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2 self.L[k, j, :, :] = temp self.s[k, j] = mdot(self.L[k, j, :, :], self.L[k, j, :, :].T)
def _dcross_K(self, j): r""" Gradient of the cross term of ELBO wrt to the kernel of latent process ``j``. Returns ------- :returns: dcross \\ dK(Z[j], Z[j]). Dimensions: M * M """ dc_dK = np.zeros((self.num_inducing, self.num_inducing)) for k in range(self.num_mog_comp): dc_dK += -0.5 * self.MoG.pi[k] * (self.invZ[j] - mdot( self.invZ[j], self.MoG.mmTS(k, j), self.invZ[j])) return dc_dK
def get_gradients_SKD(self, S, D, X, X2=None): r""" Assume we have a function Ln, which its gradient wrt to the hyper-parameters (H), is as follows: dLn\\dH = S[n, :] * dK(X,X2)\\dH * D[:, n] then this function calculates dLn\\dH for all 'n's. Parameters ---------- S : ndarray dim(S) = N * M D : ndarray dim(D) = M * N X : ndarray dim(X) = M * d, where d is the input dimensionality \n X2 : nadrray dim(X2) = M * d Returns ------- dL_dH : ndarray dL\\dH which is a matrix by dimensions N * dim(H), where dim(H) is the number of hyper-parameters. """ variance_gradient = mdot(S, self.K(X, X2), D) * 1./self.variance if X2 is None: X2 = X if self.ARD: rinv = self._inv_dist(X, X2) d = X[:, None, :] - X2[None, :, :] x_xl3 = np.square(d) * (rinv * self.dK_dr_via_X(X, X2))[:,:,None] lengthscale_gradient = -np.tensordot(D, np.tensordot(S, x_xl3, (1,0)), (0,1)) / self.lengthscale**3 lengthscale_gradient = np.diagonal(lengthscale_gradient).T else: lengthscale_gradient = np.diagonal(-mdot(S, (self._scaled_dist(X, X2) * self.dK_dr_via_X(X, X2)).T, D) / self.lengthscale)[:, np.newaxis] return np.hstack((np.diagonal(variance_gradient)[:, np.newaxis], lengthscale_gradient))
def chol_grad(L, dM_dx): """ Given that ``L`` is the Cholesky decomposition of x, and ``dM_dx`` is the gradient of M wrt to x, then this function calculates dM \\ dL L = cholesky (x) Returns ------- dM_dL : ndarray dM \\ dL """ return mdot(dM_dx+dM_dx.T, L)
def chol_grad(L, dM_dx): """ Given that ``L`` is the Cholesky decomposition of x, and ``dM_dx`` is the gradient of M wrt to x, then this function calculates dM \\ dL L = cholesky (x) Returns ------- dM_dL : ndarray dM \\ dL """ return mdot(dM_dx + dM_dx.T, L)
def _dcross_K(self, j): r""" Gradient of the cross term of ELBO wrt to the kernel of latent process ``j``. Returns ------- :returns: dcross \\ dK(Z[j], Z[j]). Dimensions: M * M """ dc_dK = np.zeros((self.num_inducing, self.num_inducing)) for k in range(self.num_mog_comp): dc_dK += -0.5 * self.MoG.pi[k] * (self.invZ[j] - mdot(self.invZ[j], self.MoG.mmTS(k, j), self.invZ[j]) ) return dc_dK
def transform_weights_grad(self, internal_grad): """ Transform a gradient with respect to the internal weights into the gradient with respect to the raw weights. Parameters ---------- internal_grad : ndarray The gradient with respect to the internal weights. Dimension: num_components. Returns ------- ndarray The gradient with respect to the raw weights. Dimension: num_components. """ pit = np.repeat(np.array([self.weights.T], dtype=np.float32), self.num_components, 0) dpi_dx = pit * (-pit.T + np.eye(self.num_components, dtype=np.float32)) return mdot(internal_grad, dpi_dx)
def transform_weights_grad(self, internal_grad): """ Transform a gradient with respect to the internal weights into the gradient with respect to the raw weights. Parameters ---------- internal_grad : ndarray The gradient with respect to the internal weights. Dimension: num_components. Returns ------- ndarray The gradient with respect to the raw weights. Dimension: num_components. """ pit = np.repeat(np.array([self.weights.T], dtype=util.PRECISION), self.num_components, 0) dpi_dx = pit * (-pit.T + np.eye(self.num_components, dtype=util.PRECISION)) return mdot(internal_grad, dpi_dx)
def _cross_dcorss_dpi(self, N): """ calculating L_corss by pi_k, and also calculates the cross term :returns d cross / d pi, cross """ cross = 0 d_pi = np.zeros(self.num_mog_comp) for j in range(self.num_latent_proc): for k in range(self.num_mog_comp): d_pi[k] += \ N * math.log(2 * math.pi) + \ self.log_detZ[j] + \ mdot(self.MoG.m[k, j, :].T, self.Kzz[j, :, :], self.MoG.m[k, j, :].T) + \ self.MoG.tr_AS(self.Kzz[j, :, :], k, j) for k in range(self.num_mog_comp): cross += self.MoG.pi[k] * d_pi[k] d_pi *= -1. / 2 cross *= -1. / 2 return cross, d_pi
def _cross_dcorss_dpi(self, N): """ calculating L_corss by pi_k, and also calculates the cross term :returns d cross / d pi, cross """ cross = 0 d_pi = np.zeros(self.num_mog_comp) for j in range(self.num_latent_proc): for k in range(self.num_mog_comp): d_pi[k] += \ N * math.log(2 * math.pi) + \ self.log_detZ[j] + \ mdot(self.MoG.m[k, j, :].T, self.Kzz[j, :, :], self.MoG.m[k, j, :].T) + \ self.MoG.tr_AS(self.Kzz[j, :, :], k, j) for k in range(self.num_mog_comp): cross += self.MoG.pi[k] * d_pi[k] d_pi *= -1. / 2 cross *= -1. / 2 return cross, d_pi
def _log_likelihood_gradients(self): """ The derivative of the lower bound wrt the (kernel) parameters """ tmp = [dtrtrs(L, self.Sy_chol_inv, lower=1)[0] for L in self._C_chols] B_invs = [ phi_hat_i * np.dot(tmp_i.T, tmp_i) for phi_hat_i, tmp_i in zip(self.phi_hat, tmp) ] #B_invs = [phi_hat_i*mdot(self.Sy_chol_inv.T,Ci,self.Sy_chol_inv) for phi_hat_i, Ci in zip(self.phi_hat,self.C_invs)] #heres the mukmukT*Lambda term LiSfi = [np.eye(self.D) - np.dot(self.Sf, Bi) for Bi in B_invs] #seems okay tmp1 = [ np.dot(LiSfik.T, Sy_inv_ybark_k) for LiSfik, Sy_inv_ybark_k in zip(LiSfi, self.Syi_ybark.T) ] tmp = 0.5 * sum( [np.dot(tmpi[:, None], tmpi[None, :]) for tmpi in tmp1]) #here's the difference in log determinants term tmp += -0.5 * sum(B_invs) #kernF_grads = np.array([np.sum(tmp*g) for g in self.kernF.extract_gradients()]) # OKAY! kernF_grads = self.kernF.dK_dtheta(tmp, self.X) #gradient wrt Sigma_Y ybarkybarkT = self.ybark.T[:, None, :] * self.ybark.T[:, :, None] Byks = [np.dot(Bi, yk) for Bi, yk in zip(B_invs, self.ybark.T)] tmp = sum([np.dot(Byk[:,None],Byk[None,:])/np.power(ph_k,3)\ -Syi_ybarkybarkT_Syi/ph_k -Bi/ph_k for Bi, Byk, yyT, ph_k, Syi_ybarkybarkT_Syi in zip(B_invs, Byks, ybarkybarkT, self.phi_hat, self.Syi_ybarkybarkT_Syi) if ph_k >1e-6]) tmp += (self.K - self.N) * self.Sy_inv tmp += mdot(self.Sy_inv, self.YTY, self.Sy_inv) tmp /= 2. #kernY_grads = np.array([np.sum(tmp*g) for g in self.kernY.extract_gradients()]) kernY_grads = self.kernY.dK_dtheta(tmp, self.X) return np.hstack((kernF_grads, kernY_grads))
def predict(self, mu, sigma, Ys, model=None): # calculating var s = sigma + self.sigma alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000]) q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu z = self.warp(model.Y)[0] I = argsort(z, axis=0) sortz = sort(z, axis=0) sortt = model.Y[I] quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100) var = np.square((quant[:, 8] - (quant[:, 0])) / 4) # calculating mu H = np.array( [7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07] ) quard = np.array( [ -3.4361591, -2.5327317, -1.7566836, -1.0366108, -0.3429013, 0.3429013, 1.0366108, 1.7566836, 2.5327317, 3.4361591, ] ) mu_quad = np.outer(np.sqrt(2 * s), quard) + mu mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100) mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi) lpd = None if not (Ys is None): ts, w = self.warp(Ys) lpd = -0.5 * np.log(2 * math.pi * s) - 0.5 * np.square(ts - mu) / s + np.log(w) return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
def dA_dhyper_mult_x(self, j, X, Aj, m): r""" Assume: dfn \\ dH = dAn \\ dH * m where: dAn \\ dH = (dK(X[n, :], Z[j]) \\ dH - An d K(Z[j], Z[j]) \\ dH) K(Z[j], Z[j]) ^ -1 and An = A[n, :] then this function returns dfn \\ dH for all `n`s: :returns dF \\dH where (dF \\dH)[n] = dfn \\ dH """ w = mdot(self.invZ[j], m) return self.kernels[j].get_gradients_AK(w.T, X, self.Z[j]) - \ self.kernels[j].get_gradients_SKD(Aj, w, self.Z[j])
def dA_dinduc_mult_x(self, j, X, Aj, m): r""" Assume: dfn \\ dZ[j] = dAn \\ dZ[j] * m where: dAn \\ dZ[j] = (dK(X[n, :], Z[j]) \\ dZ[j] - An d K(Z[j], Z[j]) \\ dZ[j]) K(Z[j], Z[j]) ^ -1 and An = A[n, :] then this function returns dfn \\ dZ[j] for all `n`s: :returns dF \\dZ[j] where (dF \\dH)[n] = dfn \\ dZ[j] """ w = mdot(self.invZ[j], m) return self.kernels[j].get_gradients_X_AK(w, self.Z[j], X) - \ self.kernels[j].get_gradients_X_SKD(Aj, w, self.Z[j])
def dA_dhyper_mult_x(self, j, X, Aj, m): r""" Assume: dfn \\ dH = dAn \\ dH * m where: dAn \\ dH = (dK(X[n, :], Z[j]) \\ dH - An d K(Z[j], Z[j]) \\ dH) K(Z[j], Z[j]) ^ -1 and An = A[n, :] then this function returns dfn \\ dH for all `n`s: :returns dF \\dH where (dF \\dH)[n] = dfn \\ dH """ w = mdot(self.invZ[j], m) return self.kernels[j].get_gradients_AK(w.T, X, self.Z[j]) - \ self.kernels[j].get_gradients_SKD(Aj, w, self.Z[j])
def dA_dinduc_mult_x(self, j, X, Aj, m): r""" Assume: dfn \\ dZ[j] = dAn \\ dZ[j] * m where: dAn \\ dZ[j] = (dK(X[n, :], Z[j]) \\ dZ[j] - An d K(Z[j], Z[j]) \\ dZ[j]) K(Z[j], Z[j]) ^ -1 and An = A[n, :] then this function returns dfn \\ dZ[j] for all `n`s: :returns dF \\dZ[j] where (dF \\dH)[n] = dfn \\ dZ[j] """ w = mdot(self.invZ[j], m) return self.kernels[j].get_gradients_X_AK(w, self.Z[j], X) - \ self.kernels[j].get_gradients_X_SKD(Aj, w, self.Z[j])
def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2)) Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega)) Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi)) r,omega,phi = self._cos_factorization(La,Lo,Lp) Gint = self._int_computation( r,omega,phi, r,omega,phi) Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None] F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] #dK_dvar dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T) #dK_dlen da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.] db_dlen = [0.,2*self.lengthscale/3.] dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2)) r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp) dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi) dGint_dlen = dGint_dlen + dGint_dlen.T dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T) dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T) #dK_dper dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X) dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2) dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period)) dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2)) r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper) IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2)) IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2)) IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T)) IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T)) #IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0]) IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1) IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi) IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi) IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T) IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T) #IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0]) IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1) dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period)) dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi)) r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2) dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi) dGint_dper = dGint_dper + dGint_dper.T dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None] dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T))) dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) # np.add(target[:,:,0],dK_dvar, target[:,:,0]) target[0] += np.sum(dK_dvar*dL_dK) #np.add(target[:,:,1],dK_dlen, target[:,:,1]) target[1] += np.sum(dK_dlen*dL_dK) #np.add(target[:,:,2],dK_dper, target[:,:,2]) target[2] += np.sum(dK_dper*dL_dK)
def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def mdot_Aj(self, Ajn, Kxnz): return mdot(Ajn.T, Ajn)
def A_n_analytical_vec(Z_f): Z_shaped = Z_f.reshape((M, Dim)) A = mdot(kernel.K(X, Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) An = mdot(kernel.K(X[np.newaxis, n,:], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) # return (kernel.get_gradients_X_AK(mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped, X)) - \ return kernel.get_gradients_X_SKD(A, mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped)[1,:,:]
def A_n_analytical(Z_f): Z_shaped = Z_f.reshape((M, Dim)) An = mdot(kernel.K(X[np.newaxis, n,:], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) # return kernel.gradients_X_(mdot(inv(kernel.K(Z_shaped, Z_shaped)), O), Z_shaped, X[np.newaxis, n,:])- \ return kernel.gradients_X(mdot(An.T, mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).T ), Z_shaped)
def A_n(Z_f): Z_shaped = Z_f.reshape((M, Dim)) return mdot(kernel.K(X[np.newaxis, n,:], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped)), O)[0,0]
def A_n_analytical_vec(Z_f): Z_shaped = Z_f.reshape((M, Dim)) A = mdot(kernel.K(X, Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) An = mdot(kernel.K(X[np.newaxis, n,:], Z_shaped), inv(kernel.K(Z_shaped, Z_shaped))) # return (kernel.get_gradients_X_AK(mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped, X)) - \ return kernel.get_gradients_X_SKD(A, mdot(inv(kernel.K(Z_shaped, Z_shaped)), O).repeat(N, 1), Z_shaped)[1,:,:] print get_d1(A_n, Z.flatten()).reshape((M, Dim)) print print A_n_analytical(Z.flatten()) print A_n_analytical_vec(Z.flatten()) O1 = np.random.normal(0, 1, M) \ .reshape((M, 1)) O2 = np.random.normal(0, 1, M) \ .reshape((M, 1)) OO = np.random.normal(0, 1, M * M) \ .reshape((M, M)) print (mdot(O1, O2.T) * OO).sum(axis=1) print mdot(OO, O2).T * O1.T
def covar_dot_a(self, a, k, j): return mdot(np.diag(self.covars[k, j]), a)
def mean_prod_sum_covar(self, component_index, latent_index): assert component_index == 0 return (mdot(self.means[0, latent_index, :, np.newaxis], self.means[0, latent_index, :, np.newaxis].T) + self.covars[latent_index])
def ll_F_Y(self, F, Y): c = 1.0 / 2 * (mdot((F-Y), self.sigma_inv) * (F-Y)).sum(axis=2) return (self.const + -c), None
def covar_dot_a(self, a, component_index, latent_index): assert component_index == 0 return mdot(self.covars[latent_index], a)
def _dell_ds(self, k, j, cond_ll, A, sigma_kj, norm_samples): return mdot(A[j].T * self._average(cond_ll, (norm_samples**2 - 1)/sigma_kj[k,j], True), A[j]) \ * self.MoG.pi[k] / 2.