예제 #1
0
    def finalize(self):
        """Finalize the fit, utilizing the already inverted Sigma matrix"""
        # Calculate the prediction quantities
        if len(self.dtp) > 0:
            self.MNt_n = np.dot(self.Mp_n.T, sl.cho_solve(self.Np_cf, self.dtp))
            dtNdt = np.dot(self.dtp, sl.cho_solve(self.Np_cf, self.dtp))
        else:
            self.MNt_n = np.zeros(self.Mp_n.shape[1])
            dtNdt = 0.0
        self.dpars_n = np.dot(self.Sigma_n, self.MNt_n + self.phipar_n)

        # TODO: should use dpars, instead of MNt below here???
        self.rp = np.dot(self.Mtot_n, np.dot(self.Sigma_n, self.MNt_n))   # Should be approx~0.0
        self.rr = np.dot(self.Mtot_n, np.dot(self.Sigma_n, self.Mtot_n.T))

        # Calculate the log-likelihood
        logdetN2 = np.sum(np.log(np.diag(self.Np_cf[0])))
        logdetphi2 = 0.5*np.sum(np.log(self.Phivec_n))
        chi2dt = 0.5*dtNdt
        chi2phi = 0.5*np.sum(self.prpars_delta_n**2/self.Phivec_n)
        chi2phi1 = 0.5*np.dot(self.dpars_n, np.dot(self.Sigma_inv_n, self.dpars_n))
        chi2_active = 0.5*np.dot(self.dpars_n, np.dot(self.Sigma_inv_n, self.dpars_n))

        # NOTE: chi2_active is zero _if_ we move to ML solution. We are dpars
        #       away from there. That's why we subtract it from loglik.
        #       Note also that, now, chi2phi1 and chi2_active are the same in
        #       this rescaling
        self.loglik = -logdetN2-logdetphi2-chi2dt-chi2phi+chi2phi1-chi2_active
        self.loglik_ml = -logdetN2-logdetphi2-chi2dt-chi2phi+chi2phi1
예제 #2
0
    def predict(self, y, t):
        """
        Compute the conditional predictive distribution of the model.

        :param y: ``(nsamples,)``
            The observations to condition the model on.

        :param t: ``(ntest,)`` or ``(ntest, ndim)``
            The coordinates where the predictive distribution should be
            computed.

        Returns a tuple ``(mu, cov)`` where

        * **mu** ``(ntest,)`` is the mean of the predictive distribution, and
        * **cov** ``(ntest, ntest)`` is the predictive covariance.

        """
        self.recompute()
        r = self._check_dimensions(y)[self.inds] - self.mean(self._x)
        xs, i = self.parse_samples(t, False)
        alpha = cho_solve(self._factor, r)

        # Compute the predictive mean.
        Kxs = self.kernel(self._x[None, :], xs[:, None])
        mu = np.dot(Kxs, alpha) + self.mean(xs)

        # Compute the predictive covariance.
        cov = self.kernel(xs[:, None], xs[None, :])
        cov -= np.dot(Kxs, cho_solve(self._factor, Kxs.T))

        return mu, cov
예제 #3
0
파일: segdetrender.py 프로젝트: Cadair/k2sc
    def predict(self, pv, flux=None, inputs=None, inputs_pred=None, mean_only=True, splits=None):
        flux = flux if flux is not None else self.data.masked_flux
        iptr = inputs if inputs is not None else self.data.masked_inputs
        ippr = inputs_pred if inputs_pred is not None else iptr

        K0 = self.compute_cmat(pv, iptr, iptr, add_wn=False, splits=splits)
        K  = K0 + self._pv[-1]**2 * identity(K0.shape[0])
        if inputs_pred is None:
            Ks  = K0.copy()
            Kss = K.copy()
        else:
            Ks  = self.compute_cmat(pv, ippr, ippr, add_wn=False, splits=splits)
            Kss = self.compute_cmat(pv, ippr, ippr, add_wn=True, splits=splits)

        L = sla.cho_factor(K)
        b = sla.cho_solve(L, flux)
        mu = dot(Ks, b)

        if mean_only:
            return mu
        else:
            b = sla.cho_solve(L, Ks.T)
            cov = Kss - dot(Ks, b)
            err = np.sqrt(diag(cov))
            return mu, err
예제 #4
0
파일: linalg.py 프로젝트: bwohlberg/sporco
def cho_solve_ATAI(A, rho, b, c, lwr, check_finite=True):
    r"""
    Solve the linear system :math:`(A^T A + \rho I)\mathbf{x} = \mathbf{b}`
    or :math:`(A^T A + \rho I)X = B` using :func:`scipy.linalg.cho_solve`.

    Parameters
    ----------
    A : array_like
      Matrix :math:`A`
    rho : float
      Scalar :math:`\rho`
    b : array_like
      Vector :math:`\mathbf{b}` or matrix :math:`B`
    c : array_like
      Matrix containing lower or upper triangular Cholesky factor,
      as returned by :func:`scipy.linalg.cho_factor`
    lwr : bool
      Flag indicating whether the factor is lower or upper triangular

    Returns
    -------
    x : ndarray
      Solution to the linear system
    """

    N, M = A.shape
    if N >= M:
        x = linalg.cho_solve((c, lwr), b, check_finite=check_finite)
    else:
        x = (b - A.T.dot(linalg.cho_solve((c, lwr), A.dot(b),
                                          check_finite=check_finite))) / rho
    return x
    def rakeDistortionlessFilters(self, source, interferer, R_n, delay=0.03, epsilon=5e-3):
        '''
        Compute time-domain filters of a beamformer minimizing noise and interference
        while forcing a distortionless response towards the source.
        '''

        H = buildRIRMatrix(self.R, (source, interferer), self.Lg, self.Fs, epsilon=epsilon, unit_damping=True)
        L = H.shape[1]/2

        # We first assume the sample are uncorrelated
        K_nq = np.dot(H[:,L:], H[:,L:].T) + R_n

        # constraint
        kappa = int(delay*self.Fs)
        A = H[:,:L]
        b = np.zeros((L,1))
        b[kappa,0] = 1

        # filter computation
        C = la.cho_factor(K_nq, overwrite_a=True, check_finite=False)
        B = la.cho_solve(C, A)
        D = np.dot(A.T, B)
        C = la.cho_factor(D, overwrite_a=True, check_finite=False)
        x = la.cho_solve(C, b)
        g_val = np.dot(B, x)

        # reshape and store
        self.filters = g_val.reshape((self.M, self.Lg))

        # compute and return SNR
        A = np.dot(g_val.T, H[:,:L])
        num = np.dot(A, A.T)
        denom =  np.dot(np.dot(g_val.T, K_nq), g_val)

        return num/denom
 def _calculate_log_likelihood(self):
     #if self.m == None:
     #    Give error message
     R = zeros((self.n, self.n))
     X,Y = array(self.X), array(self.Y)
     thetas = 10.**self.thetas
     for i in range(self.n):
         for j in arange(i+1,self.n):
             R[i,j] = (1-self.nugget)*e**(-sum(thetas*(X[i]-X[j])**2.)) #weighted distance formula
     R = R + R.T + eye(self.n)
     self.R = R
     one = ones(self.n)
     try:
         self.R_fact = cho_factor(R)
         rhs = vstack([Y, one]).T
         R_fact = (self.R_fact[0].T,not self.R_fact[1])
         cho = cho_solve(R_fact, rhs).T
         
         self.mu = dot(one,cho[0])/dot(one,cho[1])
         self.sig2 = dot(Y-dot(one,self.mu),cho_solve(self.R_fact,(Y-dot(one,self.mu))))/self.n
         #self.log_likelihood = -self.n/2.*log(self.sig2)-1./2.*log(abs(det(self.R)+1.e-16))-sum(thetas)
         self.log_likelihood = -self.n/2.*log(self.sig2)-1./2.*log(abs(det(self.R)+1.e-16))
     except (linalg.LinAlgError,ValueError):
         #------LSTSQ---------
         self.R_fact = None #reset this to none, so we know not to use cholesky
         #self.R = self.R+diag([10e-6]*self.n) #improve conditioning[Booker et al., 1999]
         rhs = vstack([Y, one]).T
         lsq = lstsq(self.R.T,rhs)[0].T
         self.mu = dot(one,lsq[0])/dot(one,lsq[1])
         self.sig2 = dot(Y-dot(one,self.mu),lstsq(self.R,Y-dot(one,self.mu))[0])/self.n
         self.log_likelihood = -self.n/2.*log(self.sig2)-1./2.*log(abs(det(self.R)+1.e-16))
예제 #7
0
    def loglik_full(self, l_a, l_rho, Agw, gammagw):
        """
        Given all these parameters, calculate the full likelihood

        @param l_a:     List of Fourier coefficient arrays for all pulsars
        @param l_rho:   List of arrays of log10(PSD) amplitudes for all pulsars
        @param Agw:     log10(GW amplitude)
        @param gammagw: GWB spectral index

        @return:        Log-likelihood
        """
        # Transform the GWB parameters to PSD coefficients (pc)
        pc_gw = self.gwPSD(Agw, gammagw)
        
        rv = 0.0
        for ii, freq in enumerate(self.freqs):
            a_cos = l_a[:,2*ii]         # Cosine modes for f=freq
            a_sin = l_a[:,2*ii+1]       # Sine modes for f=freq
            rho = l_rho[:,ii]           # PSD amp for f=freq

            # Covariance matrix is the same for sine and cosine modes
            cov = np.diag(10**rho) + self.hdmat * pc_gw[ii]
            cf = sl.cho_factor(cov)
            logdet = 2*np.sum(np.log(np.diag(cf[0])))
            
            # Add the log-likelihood for the cosine and the sine modes
            rv += -0.5 * np.dot(a_cos, sl.cho_solve(cf, a_cos)) - \
                   0.5 * np.dot(a_sin, sl.cho_solve(cf, a_sin)) - \
                   2*self.Npsr*np.log(2*np.pi) - logdet

        return rv
예제 #8
0
파일: gp_base.py 프로젝트: jeffhsu3/limix
    def _update_cache(self):
        """
        INPUT:
        hyperparams:  dictionary
        OUTPUT: dictionary with the fields
        K:     kernel
        Kinv:  inverse of the kernel
        L:     chol(K)
        alpha: solve(K,y)
        W:     D*Kinv * alpha*alpha^T
        """
        cov_params_have_changed = self.covar.params_have_changed

        if cov_params_have_changed or self.Y_has_changed:
            K = self.covar.K()
            L = LA.cholesky(K).T# lower triangular
            Kinv = LA.cho_solve((L,True),SP.eye(L.shape[0]))
            alpha = LA.cho_solve((L,True),self.Y)
            W = self.t*Kinv - SP.dot(alpha,alpha.T)
            self._covar_cache = {}
            self._covar_cache['K'] = K
            self._covar_cache['Kinv'] = Kinv
            self._covar_cache['L'] = L
            self._covar_cache['alpha'] = alpha
            self._covar_cache['W'] = W

        return self._covar_cache
예제 #9
0
파일: pyBLP.py 프로젝트: joonro/BLP-Python
    def cal_varcov(self, θ2_vec):
        """calculate variance covariance matrix"""
        θ2, ix_θ2_T, Z, LinvW, X1 = self.θ2, self.ix_θ2_T, self.Z, self.LinvW, self.X1

        θ2.T[ix_θ2_T] = θ2_vec

        # update δ
        δ = self.cal_δ(θ2)

        jacob = self.cal_jacobian(θ2, δ)

        θ1, ξ = self.cal_θ1_and_ξ(δ)

        Zres = Z * ξ.reshape(-1, 1)
        Ω = Zres.T @ Zres  # covariance of the momconds

        G = (np.c_[X1, jacob].T @ Z).T  # gradient of the momconds

        WG = cho_solve(LinvW, G)
        WΩ = cho_solve(LinvW, Ω)

        tmp = solve(G.T @ WG, G.T @ WΩ @ WG).T  # G'WΩWG(G'WG)^(-1) part

        varcov = solve((G.T @ WG), tmp)

        return varcov
예제 #10
0
    def get_covariances(self,hyperparams):
        """
        INPUT:
        hyperparams:  dictionary
        OUTPUT: dictionary with the fields
        K:     kernel
        Kinv:  inverse of the kernel
        L:     chol(K)
        alpha: solve(K,y)
        W:     D*Kinv * alpha*alpha^T
        """
        if self._is_cached(hyperparams):
            return self._covar_cache

        K = self.covar.K(hyperparams['covar'])
        
        if self.likelihood is not None:
            Knoise = self.likelihood.K(hyperparams['lik'],self.n)
            K += Knoise
            
        L = LA.cholesky(K).T# lower triangular
        alpha = LA.cho_solve((L,True),self.Y)
        Kinv = LA.cho_solve((L,True),SP.eye(L.shape[0]))
        W = self.t*Kinv - SP.dot(alpha,alpha.T)
        self._covar_cache = {}
        self._covar_cache['K'] = K
        self._covar_cache['Kinv'] = Kinv
        self._covar_cache['L'] = L
        self._covar_cache['alpha'] = alpha
        self._covar_cache['W'] = W
        self._covar_cache['hyperparams'] = copy.deepcopy(hyperparams) 
        return self._covar_cache
예제 #11
0
파일: gp2kronSum.py 프로젝트: PMBio/mtSet
    def _LMLgrad_covar_debug(self,covar):

        assert self.N*self.P<2000, 'gp2kronSum:: N*P>=2000'

        y  = SP.reshape(self.Y,(self.N*self.P), order='F') 

        K  = SP.kron(self.Cg.K(),self.XX)
        K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N))

        cholK = LA.cholesky(K).T
        Ki  = LA.cho_solve((cholK,True),SP.eye(y.shape[0]))
        Kiy   = LA.cho_solve((cholK,True),y)

        if covar=='Cr':     n_params = self.Cr.getNumberParams()
        elif covar=='Cg':   n_params = self.Cg.getNumberParams()
        elif covar=='Cn':   n_params = self.Cn.getNumberParams()

        RV = SP.zeros(n_params)

        for i in range(n_params):
            #0. calc grad_i
            if covar=='Cg':
                C   = self.Cg.Kgrad_param(i)
                Kgrad  = SP.kron(C,self.XX)
            elif covar=='Cn':
                C   = self.Cn.Kgrad_param(i)
                Kgrad  = SP.kron(C,SP.eye(self.N))

            #1. der of log det
            RV[i]  = 0.5*(Ki*Kgrad).sum()
            
            #2. der of quad form
            RV[i] -= 0.5*(Kiy*SP.dot(Kgrad,Kiy)).sum()

        return RV
예제 #12
0
파일: gp.py 프로젝트: catniplab/vLGP
def elbo(params, mask, *args):
    """ELBO with full posterior covariance matrix"""
    t, mu, post_cov = args
    K, dK = kernel(t, params)
    dK *= mask[np.newaxis, np.newaxis, :]
    try:
        L = cholesky(K, lower=True)
    except LinAlgError:
        return -np.inf, np.zeros_like(params)

    Kinv = cho_solve((L, True), np.eye(K.shape[0]))  # K inverse

    if mu.ndim == 1:
        mu = mu[:, np.newaxis]

    alpha = cho_solve((L, True), mu)
    ll_dims = -0.5 * np.einsum("ik,ik->k", mu, alpha)
    tmp = np.einsum("ik,jk->ijk", alpha, alpha)
    tmp -= Kinv[:, :, np.newaxis]

    for i in range(post_cov.shape[-1]):
        KinvSigma = cho_solve((L, True), post_cov[:, :, i])
        ll_dims[i] -= 0.5 * np.trace(KinvSigma)
        tmp[:, :, i] += KinvSigma @ Kinv

    ll_dims -= np.log(np.diag(L)).sum()
    ll = ll_dims.sum(-1)

    dll_dims = 0.5 * np.einsum("ijl,ijk->kl", tmp, dK)
    dll = dll_dims.sum(-1)

    return ll, dll
예제 #13
0
    def predict(self, y, t):
        """
        Compute the conditional predictive distribution of the model.

        :param y: ``(nsamples, )``
            The observations to condition the model on.

        :param t: ``(ntest, )``
            The coordinates where the predictive distribution should be
            computed.

        :returns mu: ``(ntest, )``
            The mean of the predictive distribution.

        :returns cov: ``(ntest, ntest)``
            The predictive covariance.

        """
        r = self._check_dimensions(y)
        xs, i = self._parse_samples(t, False)
        alpha = cho_solve(self._factor, r)

        # Compute the predictive mean.
        Kxs = self._kernel(self._x[None, :], xs[:, None])
        mu = np.dot(Kxs, alpha)

        # Compute the predictive covariance.
        cov = self._kernel(xs[:, None], xs[None, :])
        cov -= np.dot(Kxs, cho_solve(self._factor, Kxs.T))

        return mu, cov
예제 #14
0
파일: gp.py 프로젝트: ninjin/spearmint-lite
        def grad_nlogprob(hypers):
            amp2  = np.exp(hypers[0])
            noise = np.exp(hypers[1])
            ls    = np.exp(hypers[2:])

            chol, corr, grad_corr = memoize(amp2, noise, ls)
            solve   = spla.cho_solve((chol, True), diffs)
            inv_cov = spla.cho_solve((chol, True), np.eye(chol.shape[0]))

            jacobian = np.outer(solve, solve) - inv_cov

            grad = np.zeros(self.D + 2)

            # Log amplitude gradient.
            grad[0] = 0.5 * np.trace(np.dot( jacobian, corr + 1e-6*np.eye(chol.shape[0]))) * amp2

            # Log noise gradient.
            grad[1] = 0.5 * np.trace(np.dot( jacobian, np.eye(chol.shape[0]))) * noise

            # Log length scale gradients.
            for dd in xrange(self.D):
                grad[dd+2] = 1 * np.trace(np.dot( jacobian, -amp2*grad_corr[:,:,dd]*comp[:,dd][:,np.newaxis]/(np.exp(ls[dd]))))*np.exp(ls[dd])

            # Roll in the prior variance.
            #grad -= 2*hypers/self.hyper_prior

            return -grad
예제 #15
0
def compute_logprod_derivative(Alup, dA, B, dB):
    """ I = logdet(A)+Tr(inv(A)*B)
        dI/dx = Tr(inv(A)*(dA - dA*inv(A)*B + dB) """

    tmp = lalg.cho_solve(Alup, B, check_finite=False)
    tmp2 = dA + dB - dA.dot(tmp)
    return np.trace(lalg.cho_solve(Alup, tmp2, check_finite=False))
예제 #16
0
파일: gp2kronSumLR.py 프로젝트: PMBio/mtSet
    def LMLdebug(self):
        """
        LML function for debug
        """
        assert self.N*self.P<5000, 'gp2kronSum:: N*P>=5000'

        y = SP.reshape(self.Y,(self.N*self.P), order='F') 
        V = SP.kron(SP.eye(self.P),self.F)

        XX = SP.dot(self.Xr,self.Xr.T)
        K  = SP.kron(self.Cr.K(),XX)
        K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N))

        # inverse of K
        cholK = LA.cholesky(K)
        Ki = LA.cho_solve((cholK,False),SP.eye(self.N*self.P))

        # Areml and inverse
        Areml = SP.dot(V.T,SP.dot(Ki,V))
        cholAreml = LA.cholesky(Areml)
        Areml_i = LA.cho_solve((cholAreml,False),SP.eye(self.K*self.P))

        # effect sizes and z
        b = SP.dot(Areml_i,SP.dot(V.T,SP.dot(Ki,y)))
        z = y-SP.dot(V,b)
        Kiz = SP.dot(Ki,z)

        # lml
        lml  = y.shape[0]*SP.log(2*SP.pi)
        lml += 2*SP.log(SP.diag(cholK)).sum()
        lml += 2*SP.log(SP.diag(cholAreml)).sum()
        lml += SP.dot(z,Kiz)
        lml *= 0.5

        return lml
예제 #17
0
    def find_likelihood_der(self, X, y):
        """
        Find the negative log likelihood and its partial derivatives.

        Parameters
        ----------

        Returns
        -------
        """
        n = len(X)
        K = self.cf.eval(X)

        #if len(self.krnds)!=K.shape[0]:
        #    print "Created new self.krnds!"
        #    self.krnds = np.random.randn(K.shape[0])*10**-6
        #K = K + np.eye(K.shape[0])*self.krnds        

        L = np.linalg.cholesky(K) # Problems using this on the cluster - bad scaling! Running time becomes really bad with large N. Solution: Update ATLAS
        #L = la.cholesky(K)
        #print np.linalg.solve(L.T, np.linalg.solve(L, y))
        #a = np.linalg.solve(L.T, np.linalg.solve(L, y))
        a = la.cho_solve((L, True), y)

        nll = 0.5*np.dot(y.T, a) + np.sum(np.log(np.diag(L))) + 0.5*n*np.log(2*np.pi)
        ders = np.zeros(len(self.cf.get_params()))
        #W = np.linalg.solve(L.T, np.linalg.solve(L, np.eye(n))) - a*a.T

        W = la.cho_solve((L, True), np.eye(n))  - a*a.T
        
        for i in range(len(self.cf.get_params())):
            ders[i] = np.sum(W*self.cf.derivative(X, i))/2
        return nll[0,0], ders
예제 #18
0
파일: pygp_model.py 프로젝트: sfalkner/RoBO
 def predict_variance(self, X1, X2):
     if self.m == None:
         print("ERROR: Model has to be trained first.")
         return None
     LX1 = spla.cho_solve((self.m.posterior.L, True), self.kernel.getCovMatrix(self.X, X1, "cross"))
     LX2 = spla.cho_solve((self.m.posterior.L, True), self.kernel.getCovMatrix(self.X, X2, "cross"))
     var = self.kernel.getCovMatrix(X1, X2, "cross") - np.dot(LX1.T, LX2)
     return var
예제 #19
0
	def E_step(self):
		M = np.dot(self.W.T,self.W) + np.eye(self.q)*self.sigma2
		#M_inv = np.linalg.inv(M)
		#self.m_Z = np.dot(M_inv,np.dot(self.W.T,self.X2.T)).T
		#self.S_z = M_inv*self.sigma2
		M_chol = linalg.cholesky(M)
		M_inv = linalg.cho_solve((M_chol,1),np.eye(self.q))
		self.m_Z = linalg.cho_solve((M_chol,1),np.dot(self.W.T,self.X2.T)).T
		self.S_z = M_inv*self.sigma2
예제 #20
0
def remove_affine(p, q, q_factor=None, skip_factorization=False):
    """Removes an (unknown) affine transform between two matrixes.

    Given two arrays of the same size, `p` and `q`, finds a matrix `A` and
    column vector `t` such that

    `p = A * q + t`

    in the least-squares sense, and then computes `qnew = A * q + t`. (Notation:
    `matrix + vector` implies the vector is added to each column of the matrix.)

    NB: `p` and the returned `qnew` will be equal if and only if `p` is
    generated from `q` via an affine transform (no noise).

    Returns `(qnew, q_factor, Ahat, that)`. `q_factor` is a matrix factorization
    that can greatly speed up subsequent calls to remove_affine *with the same
    `q`*. If your `q` stays the same for multiple calls, cache `q_factor` and
    pass it in as a keyword argument; `q_factor` won't change from call to call.
    However, if your `q` change from call to call, ignore `q_factor` and pass in
    `skip_factorization=False` to avoid even calculating it. `Ahat` and `that`
    are the estimated values of `A` and `t`.

    NB2: the default `q_factor=None` will trigger computation of the
    factorization unless `skip_factorization=False`. Non-`None` `q_factor` will
    be trusted: no checks will be performed to make sure the given `q_factor` is
    indeed generated by the `q` you pass in. (Example: for `q.shape` of (2, 22),
    the speedup from using `q_factor` is 1.4x with skip_factorization=False, and
    1.3x the case with skip_factorization=True, on a 2009 Mac Book Pro.)

    Implements the algorithm described in H. Spath, "Fitting affine and
    orthogonal transformations between two sets of points" in *Mathematical
    Communications*, vol. 9 (2004), pp. 27--34. http://hrcak.srce.hr/file/1425

    """

    qaug = np.vstack([q, np.ones_like(q[0, :])])
    if q_factor is None:
        Q = np.dot(qaug, qaug.T)

        if skip_factorization:
            sol = la.lstsq(Q, np.dot(qaug, p.T))[0]
            q_factor = None

        else:
            q_factor = scila.cho_factor(Q)
            sol = scila.cho_solve(q_factor, np.dot(qaug, p.T))

    else:
        sol = scila.cho_solve(q_factor, np.dot(qaug, p.T))

    # sol.shape is (n+1, n), for n=p.shape[0]
    Ahat = sol[:-1, :].T  # top square matrix of sol, transposed
    that = sol[-1:, :].T  # bottom row vector of sol, transposed
    qnew = np.dot(Ahat, q) + that
    return (qnew, lambda x: Ahat @ x + that,
            lambda t: np.linalg.lstsq(Ahat, t - that, rcond=None)[0], Ahat, that)
예제 #21
0
    def multivariate_t_pdf(self, nu, cov_det, d, scaleT, centered, L):
        L *= scaleT
        linalg.cho_solve((L, True), centered, overwrite_b=True,
                 check_finite=False)
        inv = centered.T.dot(centered)  # (L^-1b)^T(L^-1b)

        # Log Multivariate T - PDF
        return gammaln((nu + d) / 2.) - \
            (gammaln(nu / 2.) + (d / 2.) * (log(nu) + log(pi))
            + (0.5 * cov_det) + ((nu + d) / 2.) * log(1. + inv/nu))
예제 #22
0
파일: mogpr.py 프로젝트: danieljtait/pydygp
    def log_marginal_likelihood(self, theta=None, eval_gradient=False):
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_
        kernel = self.kernel_.clone_with_theta(theta)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            if eval_gradient:
                K, K_gradient = kernel(self.X_train_, eval_gradient=True)
            else:
                K = kernel(self.X_train_)

        #check finite
        if np.isnan(K).any() or np.isinf(K).any():
            return (-np.inf, np.zeros_like(theta)) \
                   if eval_gradient else -np.inf

        K[np.diag_indices_from(K)] += self.alpha

        try:
            L = cholesky(K, lower=True)
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
예제 #23
0
	def likelihood_prior(self, mu, Sigma, k, R_S_mu = None, log_det_Q = None, R_S = None, switchprior = False):
			"""
					Computes the prior that is 
					\pi( \mu | \theta[k], \Sigma[k]) \pi(\Sigma| Q[k], \nu[k]) = 
					N(\mu; \theta[k], \Sigma[k]) IW(\Sigma; Q[k], \nu[k]) 

					If switchprior = True, special values of nu and Sigma_mu
					are used if the parameters nu_sw and Sigma_mu_sw are set
					respectively. This enables use of "relaxed" priors
					facilitating label switch. NB! This makes the kernel
					non-symmetric, hence it cannot be used in a stationary state.
			"""

			if switchprior:			
				try:
					nu = self.nu_sw
				except:
					nu = self.prior[k]['sigma']['nu']
				try:
					Sigma_mu = self.Sigma_mu_sw
				except:
					Sigma_mu = self.prior[k]['mu']['Sigma']
				Q = self.prior[k]['sigma']['Q']*nu/self.prior[k]['sigma']['nu']
			else:
				nu = self.prior[k]['sigma']['nu']
				Sigma_mu = self.prior[k]['mu']['Sigma']
				Q = self.prior[k]['sigma']['Q']

			if np.isnan(mu[0]) == 1:
					return 0, None, None, None
			
			if R_S_mu is None:
					R_S_mu = sla.cho_factor(Sigma_mu,check_finite = False)
			log_det_Sigma_mu = 2 * np.sum(np.log(np.diag(R_S_mu[0])))
			
			if log_det_Q is None:
					R_Q = sla.cho_factor(Q,check_finite = False)
					log_det_Q = 2 * np.sum(np.log(np.diag(R_Q[0])))
			
			if R_S is None:
					R_S = sla.cho_factor(Sigma,check_finite = False)
			log_det_Sigma	= 2 * np.sum(np.log(np.diag(R_S[0])))
			
			
			
			mu_theta = mu - self.prior[k]['mu']['theta'].reshape(self.d)
			# N(\mu; \theta[k], \Sigma[k])
			
			lik = - np.dot(mu_theta.T, sla.cho_solve(R_S_mu, mu_theta, check_finite = False))  /2
			lik = lik - 0.5 * (nu + self.d + 1.) * log_det_Sigma
			lik = lik +  (nu * 0.5) * log_det_Q
			lik = lik - 0.5 * log_det_Sigma_mu
			lik = lik - self.ln_gamma_d(0.5 * nu) - 0.5 * np.log(2) * (nu * self.d)
			lik = lik - 0.5 * np.sum(np.diag(sla.cho_solve(R_S, Q)))
			return lik, R_S_mu, log_det_Q, R_S
예제 #24
0
    def draw_new_wt_assgns(self, word, topic_id, new_doc=False, wvmodel=None):
        """
        Log of the probablity density function for the Student-T Distribution

        Provides a PDF for a word (really a word-vector) in a given topic distribution.

        :param word: string of the word to find probabilty of word-topic assignment
        :param topic_id: Interger, a topic id to reference a topic distribution and its params
        :param new_doc: False (default), optional.  True if predicting topics from unseen document/not currently training
        :param wvmodel: None by default.  If predicting topics from an unseen document, requires a loaded word2vec model
        from GenSim
        :type wvmodel: gensim.models.word2vec.Word2Vec
        :return: log of PDF from t-distribution for a given word.  Type: Float
        """

        if not new_doc:
            # Getting params for calculating PDF of T-Dist for a word
            cov_det = self.topic_params[topic_id]["Chol Det"]
            Nk = self.topic_params[topic_id]["Topic Count"]
            # Precalculating some terms (V_di - Mu)
            centered = np.copy(self.word_vecs[word] - self.topic_params[topic_id]["Topic Mean"])
            # (L^-1b)^T(L^-1b) _
            if np.isnan(centered).any() or np.isinf(centered).any():
                print centered
                print topic_id
                print Nk
                print word
                print self.word_vecs[word]
                print self.topic_params[topic_id]["Topic Mean"]
            linalg.cho_solve((self.topic_params[topic_id]["Lower Triangle"], True), centered, overwrite_b=True,
                check_finite=True)
            LLcomp = centered.T.dot(centered)
            # SHOULD THSI BE CENTERD.DOT(INV_COV).DOT(CENTERED.T))????
            d = self.word_vec_size   # dimensionality of word vector
            nu = self.priors.nu + Nk - d + 1.

            # Log PDF of multivariate student-T distribution
            log_prob = gammaln(nu + d / 2.) - \
                       (gammaln(nu / 2.) + d/2. * (log(nu) + log(pi)) +0.5 * cov_det + ((nu + d) / 2.) * log((1. + LLcomp ) / nu))

            return log_prob

        if new_doc:
            cov_det = self.topic_params[topic_id]["Chol Det"]
            Nk = self.topic_params[topic_id]["Topic Count"]
            centered = self.word_vecs[word] - self.topic_params[topic_id]["Topic Mean"]

            cholesky_solution = linalg.cho_solve((self.topic_params[topic_id]["Lower Triangle"], True), centered)
            LLcomp = cholesky_solution.T.dot(cholesky_solution) # TODO: update to be like loop above
            d = wvmodel.vector_size
            nu = self.priors.nu + Nk - d + 1.
            log_prob = gammaln((nu + d) / 2.) - \
                       (gammaln(nu / 2.) + d/2. * (log(nu) + log(pi)) +0.5 * np.log(cov_det) + ((nu + d) / 2.) * log((1. + LLcomp )/ nu))
            return log_prob
예제 #25
0
파일: gaussianprocess.py 프로젝트: exord/gp
    def prediction(self, data=None):
        """
        Evaluates the posterior GP mean and covariance functions.

        This method computes the mean and covariance matrix of the posterior
        predictive distribution of the GP. The mean and covariance matrix are
        incorporated as attributes of the class and can be subsequently used to
        draw samples of the function values corresponding to the input values.

        If no data array is passed as argument, then the data attribute is used.

        :param np.array data: a `(N x 2)` or `(N x 3)` array of N data inputs:
         (data coordiante, data value, data error (optional)).

        :return: mean and covariance matrix of posterior predictive.
        """

        if data is None and self.data is None:
            raise TypeError('Data array cannot be None, unless you want your'
                            'predictions to look like your prior. In that'
                            'case, better use the `sample` method.')

        elif data is not None:

            if self.data is not None:
                print('Data given. Overriden previous data.')
            self.data = data

            # Compute covariance matrices
            cov_test_data, cov_data = self.computecovariances(self.data)
            self.covariance_test_data = cov_test_data
            self.covariance_data = cov_data

        # If errors are provided for data, add them to the covariance diagonal
        if self.data.shape[0] > 2:
            dataerror = np.diag(np.atleast_1d(self.data[2] ** 2))
        else:
            dataerror = np.diag(np.zeros_like(self.data[0]))

        # Use Cholesky decomposition on covariance of data inputs.
        factor, flag = cho_factor(self.covariance_data + dataerror)

        # Compute posterior mean (eq. 2.23 Rasmussen)
        a = cho_solve((factor, flag), self.data[1])
        self.predmean = np.dot(self.covariance_test_data, np.array(a))

        # Compute posterior covariance (eq. 2.24 Rasmussen)
        alpha = cho_solve((factor, flag), self.covariance_test_data.T)
        beta = np.dot(self.covariance_test_data, np.array(alpha))
        self.predcov = self.covariance - beta

        return self.predmean, self.predcov
예제 #26
0
파일: gpnarx.py 프로젝트: RJT1990/pyflux
    def _alpha(self, L):
        """ Covariance-derived term to construct expectations. See Rasmussen & Williams.
        
        Parameters
        ----------
        L : np.ndarray
            Cholesky triangular

        Returns
        ----------
        np.ndarray (alpha)
        """
        return la.cho_solve((L.T, True), la.cho_solve((L, True), np.transpose(self.data)))
예제 #27
0
파일: blp.py 프로젝트: mindis/BLP-Python
    def _GMM(self, theta_vec):
        """GMM objective function"""
        _blp, theta, delta, v, D, x2, nmkt, nsimind, nbrand = self.set_aliases()

        theta[self.ix_theta] = theta_vec
        theta_v = theta[:, 0]
        theta_D = theta[:, 1:]

        # adaptive etol
        if self.GMM_diff < 1e-6:
            etol = self.etol = 1e-13
        elif self.GMM_diff < 1e-3:
            etol = self.etol = 1e-12
        else:
            etol = self.etol = 1e-9

        if self.cython:
            _blp.cal_delta(delta,
                           theta_v, theta_D,
                           self.ln_s_jt,
                           v, D, x2, nmkt, nsimind, nbrand,
                           etol, self.iter_limit)
        else:
            self.cal_delta(theta)

        if np.isnan(delta).sum():
            return(1e+10)

        Z_x1 = self.Z_x1
        LW = self.LW

        # Z'delta
        Z_delta = self.Z.T.dot(delta)

        #\[ \theta_1 = (\tilde{X}'ZW^{-1}Z'\tilde{X})^{-1}\tilde{X}'ZW^{-1}Z'\delta \]
        theta1 = solve(Z_x1.T.dot(cho_solve(LW, Z_x1)),
                       Z_x1.T.dot(cho_solve(LW, Z_delta)))

        xi = self.xi = delta - self.x1.dot(theta1)

        # Z'xi
        Z_xi = self.Z.T.dot(xi)

        # \[ (\delta - \tilde{X}\theta_1)'ZW^{-1}Z'(\delta-\tilde{X}\theta_1) \]
        GMM = Z_xi.T.dot(cho_solve(LW, Z_xi))

        self.GMM_diff = abs(self.GMM_old - GMM)
        self.GMM_old = GMM

        print('GMM value: {}'.format(GMM))
        return(GMM)
예제 #28
0
def mark2loglikelihood(psr, Aw, Ar, Si):
    """
    Log-likelihood for our pulsar
    
    This likelihood does marginalize over the timing model. Calculate
    covariance matrix in the time-domain with:
    
    ll = 0.5 * res^{t} (C^{-1} - C^{-1} M (M^{T} C^{-1} M)^{-1} M^{T} C^{-1} ) res - \
         0.5 * log(det(C)) - 0.5 * log(det(M^{T} C^{-1} M))
         
    In relation to 'mark1loglikelihood', this likelihood has but a simple addition:
    res' = res - M xi
    where M is a (n x m) matrix, with m < n, and xi is a vector of length m. The xi
    are analytically marginalised over, yielding the above equation (up to constants)
    
    :param psr:
        pulsar object, containing the data and stuff

    :param Aw:
        White noise amplitude, model parameter

    :param Ar:
        Red noise amplitude, model parameter

    :param Si:
        Spectral index of red noise, model parameter
    """
    Mmat = psr.Mmat

    Cov = Aw ** 2 * np.eye(len(psr.toas)) + PL_covmat(psr.toas, Ar, alpha=0.5 * (3 - Si), fL=1.0 / (year * 20))

    cfC = sl.cho_factor(Cov)
    Cinv = sl.cho_solve(cfC, np.eye(len(psr.toas)))
    ldetC = 2 * np.sum(np.log(np.diag(cfC[0])))

    MCM = np.dot(Mmat.T, np.dot(Cinv, Mmat))
    cfM = sl.cho_factor(MCM)
    ldetM = 2 * np.sum(np.log(np.diag(cfM[0])))

    wr = np.dot(Cinv, psr.residuals)
    rCr = np.dot(psr.residuals, wr)
    MCr = np.dot(Mmat.T, wr)

    return (
        -0.5 * rCr
        + 0.5 * np.dot(MCr, sl.cho_solve(cfM, MCr))
        - 0.5 * ldetC
        - 0.5 * ldetM
        - 0.5 * len(psr.residuals) * np.log(2 * np.pi)
    )
예제 #29
0
    def solve(self, other):
        if other.ndim == 1:
            Nx = np.array(other / self.N)
        elif other.ndim == 2:
            Nx = np.array(other / self.N[:,None])
        UNx = np.dot(self.U.T, Nx)

        Sigma = np.diag(1/self.J) + np.dot(self.U.T, self.U/self.N[:,None])
        cf = sl.cho_factor(Sigma)
        if UNx.ndim == 1:
            tmp = np.dot(self.U, sl.cho_solve(cf, UNx)) / self.N
        else:
            tmp = np.dot(self.U, sl.cho_solve(cf, UNx)) / self.N[:,None]
        return Nx - tmp
예제 #30
0
    def testGetGradients(self):
        '''
        Compares the gradients computed as done originally in spear-mint with our implementation.
        '''
        xstar = scale * npr.random((1,d))
        (mg,vg) = self.gp.getGradients(xstar[0])
        
        ######################################################################################
        #Spearmint Code
        #The code below is taken from GPEIOptChooser and adapted to the variables here.
        cand_cross_grad = self.amp2 * self.cov_grad_func(self.ls, self.X, xstar)
        
        comp_cov   = cov(self, self.X)
        cand_cross = cov(self, self.X, xstar)

        # Compute the required Cholesky.
        obsv_cov = comp_cov + self.noise * np.eye(self.X.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)
        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True), self.y - self.mean)

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)
        grad_xp_m = np.dot(alpha.transpose(), grad_cross)
        grad_xp_v = -2 * np.dot(spla.cho_solve(
                (obsv_chol, True), cand_cross).transpose(), grad_cross)
        
        ######################################################################################
        #End of Spearmint Code
        
        #it seems the gradient of the spearmint code is already optimized and therefore differs by sign
        #however, the gradient of our implementation agrees with the first order approximation
        grad_xp_m = -grad_xp_m
        grad_xp_v = -grad_xp_v
        assert(spla.norm(mg - grad_xp_m) < 1e-50)
        assert(spla.norm(vg[0] - grad_xp_v[0]) < 1e-50)
        
        #Test against first order approximation
        epsilon = 1e-6
        vg = np.array([vg]) #needs to be in the format [[d0,...,dn]]
        def get_variance(x):
            return self.gp.predict(x, True)[1]
        self.assert_first_order_gradient_approximation(get_variance, xstar, vg, epsilon)
        
        mg = np.array([np.array([mg])]) #we need mg in the format [[d0, d1, ..., dn]]
        def get_mean(x):
            return np.array([self.gp.predict(x)])
        self.assert_first_order_gradient_approximation(self.gp.predict, xstar, mg, epsilon)
예제 #31
0
def krr(descriptors,
        labels,
        training_size=1500,
        test_size=None,
        sigma=1000.0,
        opt=True,
        identifier=None,
        kernel='gaussian',
        use_tf=True,
        show_msgs=True):
    """
    Basic krr methodology for a single descriptor type.
    descriptors: array of descriptors.
    labels: array of labels.
    training_size: size of the training set to use.
    test_size: size of the test set to use. If no size is given,
        the last remaining molecules are used.
    sigma: depth of the kernel.
    opt: if the optimized algorithm should be used. For benchmarking purposes.
    identifier: string with the name of the descriptor used.
    kernel: which kernel to use.
    use_tf: if tensorflow should be used.
    show_msgs: if debug messages should be shown.
    NOTE: identifier is just a string and is only for identification purposes.
    Also, training is done with the first part of the data and
        testing with the ending part of the data.
    """
    tic = time.perf_counter()
    # Initial calculations for later use.
    data_size = descriptors.shape[0]

    if not identifier:
        identifier = 'NOT SPECIFIED'

    if not data_size == labels.shape[0]:
        raise ValueError('Labels size is different than descriptors size.')

    if training_size >= data_size:
        raise ValueError('Training size is greater or equal to the data size.')

    # If tf is to be used but couldn't be imported, don't try to use it.
    if use_tf and not TF_AV:
        use_tf = False

    # If test_size is not set, it is set to a maximum size of 1500.
    # Also, no overlapping with training data is achieved.
    if not test_size:
        test_size = data_size - training_size
        if test_size > 1500:
            test_size = 1500

    if show_msgs:
        printc(f'{identifier} ML started.', 'GREEN')
        printc(f'\tTraining size: {training_size}', 'CYAN')
        printc(f'\tTest size: {test_size}', 'CYAN')
        printc(f'\tSigma: {sigma}', 'CYAN')
        printc(f'\tKernel: {kernel}', 'CYAN')
        printc(f'\tUse tf: {use_tf}', 'CYAN')

    if use_tf:
        if tf.config.experimental.list_physical_devices('GPU'):
            with tf.device('GPU:0'):
                X_tr = descriptors[:training_size]
                Y_tr = labels[:training_size]
                if kernel == 'gaussian':
                    K_tr = gaussian_kernel(X_tr,
                                           X_tr,
                                           sigma,
                                           use_tf=use_tf)

                elif kernel == 'laplacian':
                    K_tr = laplacian_kernel(X_tr,
                                            X_tr,
                                            sigma,
                                            use_tf=use_tf)

                elif kernel == 'wasserstein':
                    K_tr = wasserstein_kernel(X_tr,
                                              X_tr,
                                              sigma,
                                              use_tf=use_tf)

                else:
                    raise TypeError(f'{kernel} kernel not found.')

                # Adding a small value on the diagonal for cho_solve.
                dv = tf.linalg.tensor_diag(tf.constant(1e-8,
                                                       shape=(training_size),
                                                       dtype=tf.float64))
                K_tr += dv
                Y_tr = tf.expand_dims(Y_tr, 1)
                alpha = tf.linalg.cholesky_solve(tf.linalg.cholesky(K_tr),
                                                 Y_tr)

                X_te = descriptors[-test_size:]
                Y_te = labels[-test_size:]
                if kernel == 'gaussian':
                    K_te = gaussian_kernel(X_te,
                                           X_tr,
                                           sigma,
                                           use_tf=use_tf)

                elif kernel == 'laplacian':
                    K_te = laplacian_kernel(X_te,
                                            X_tr,
                                            sigma,
                                            use_tf=use_tf)

                elif kernel == 'wasserstein':
                    K_te = wasserstein_kernel(X_te,
                                              X_tr,
                                              sigma,
                                              use_tf=use_tf)

                else:
                    raise TypeError(f'{kernel} kernel not found.')

                Y_te = tf.expand_dims(Y_te, 1)
                Y_pr = tf.tensordot(K_te, alpha, 1)

                mae = tf.reduce_mean(tf.abs(Y_pr - Y_te))
        else:
            raise TypeError('No GPU found, could not create Tensor objects.')
    else:
        X_tr = descriptors[:training_size]
        Y_tr = labels[:training_size]
        if kernel == 'gaussian':
            K_tr = gaussian_kernel(X_tr,
                                   X_tr,
                                   sigma,
                                   use_tf=use_tf)

        elif kernel == 'laplacian':
            K_tr = laplacian_kernel(X_tr,
                                    X_tr,
                                    sigma,
                                    use_tf=use_tf)

        elif kernel == 'wasserstein':
            K_tr = wasserstein_kernel(X_tr,
                                      X_tr,
                                      sigma,
                                      use_tf=use_tf)

        else:
            raise TypeError(f'{kernel} kernel not found.')

        # Adding a small value on the diagonal for cho_solve.
        K_tr[np.diag_indices_from(K_tr)] += 1e-8
        alpha = LA.cho_solve(LA.cho_factor(K_tr),
                             Y_tr)

        X_te = descriptors[-test_size:]
        Y_te = labels[-test_size:]
        if kernel == 'gaussian':
            K_te = gaussian_kernel(X_te,
                                   X_tr,
                                   sigma,
                                   use_tf=use_tf)

        elif kernel == 'laplacian':
            K_te = laplacian_kernel(X_te,
                                    X_tr,
                                    sigma,
                                    use_tf=use_tf)

        elif kernel == 'wasserstein':
            K_te = wasserstein_kernel(X_te,
                                      X_tr,
                                      sigma,
                                      use_tf=use_tf)

        else:
            raise TypeError(f'{kernel} kernel not found.')
        Y_pr = np.dot(K_te, alpha)

        mae = np.mean(np.abs(Y_pr - Y_te))

    toc = time.perf_counter()
    tictoc = toc - tic
    if show_msgs:
        printc(f'\tMAE for {identifier}: {mae:.4f}', 'GREEN')
        printc(f'\t{identifier} ML took {tictoc:.4f} seconds.', 'GREEN')

    return mae, tictoc
예제 #32
0
    def predict(self,
                X,
                return_std=False,
                return_cov=False,
                return_mean_grad=False,
                return_std_grad=False):
        """
        In addition to the mean of the predictive distribution, also its
        standard deviation (return_std=True) or covariance (return_cov=True),
        the gradient of the mean and the standard-deviation with respect to X
        can be optionally provided.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Query points where the GP is evaluated

        return_std : bool, default: False
            If True, the standard-deviation of the predictive distribution at
            the query points is returned along with the mean.

        return_cov : bool, default: False
            If True, the covariance of the joint predictive distribution at
            the query points is returned along with the mean

        return_mean_grad: bool, default: False
            Whether or not to return the gradient of the mean.
            Only valid when X is a single point.

        return_std_grad: bool, default: False
            Whether or not to return the gradient of the std.
            Only valid when X is a single point.

        Returns
        -------
        y_mean : array, shape = (n_samples, [n_output_dims])
            Mean of predictive distribution a query points
        y_std : array, shape = (n_samples,), optional
            Standard deviation of predictive distribution at query points.
            Only returned when return_std is True.
        y_cov : array, shape = (n_samples, n_samples), optional
            Covariance of joint predictive distribution a query points.
            Only returned when return_cov is True.
        y_mean_grad: shape = (n_samples, n_features)
            The gradient of the predicted mean
        y_std_grad: shape = (n_samples, n_features)
            The gradient of the predicted std.
        """
        if return_std and return_cov:
            raise RuntimeError(
                "Not returning standard deviation of predictions when "
                "returning full covariance.")
        if return_std_grad and not return_std:
            raise ValueError("Not returning std_gradient without returning "
                             "the std.")
        X = check_array(X)
        if X.shape[0] != 1 and (return_mean_grad or return_std_grad):
            raise ValueError("Not implemented for n_samples > 1")

        if not hasattr(self, "X_train_"):  # Unfitted;predict based on GP prior
            y_mean = np.zeros(X.shape[0])
            if return_cov:
                y_cov = self.kernel(X)
                return y_mean, y_cov
            elif return_std:
                y_var = self.kernel.diag(X)
                return y_mean, np.sqrt(y_var)
            else:
                return y_mean
        else:  # Predict based on GP posterior
            K_trans = self.kernel_(X, self.X_train_)
            y_mean = K_trans.dot(self.alpha_)  # Line 4 (y_mean = f_star)
            y_mean = self.y_train_mean + y_mean  # undo normal.

            if return_cov:
                v = cho_solve((self.L_, True), K_trans.T)  # Line 5
                y_cov = self.kernel_(X) - K_trans.dot(v)  # Line 6
                return y_mean, y_cov

            elif return_std:
                # compute inverse K_inv of K based on its Cholesky
                # decomposition L and its inverse L_inv
                L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0]))
                K_inv = L_inv.dot(L_inv.T)
                # Compute variance of predictive distribution
                y_var = self.kernel_.diag(X)
                y_var -= np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv)

                # Check if any of the variances is negative because of
                # numerical issues. If yes: set the variance to 0.
                y_var_negative = y_var < 0
                if np.any(y_var_negative):
                    warnings.warn("Predicted variances smaller than 0. "
                                  "Setting those variances to 0.")
                    y_var[y_var_negative] = 0.0
                y_std = np.sqrt(y_var)

            if return_mean_grad:
                grad = self.kernel_.gradient_x(X[0], self.X_train_)
                grad_mean = np.dot(grad.T, self.alpha_)

                if return_std_grad:
                    # XXX: Cache np.dot(K_trans, K_inv) from above
                    grad_std = np.zeros(X.shape[1])
                    if not np.allclose(y_std, grad_std):
                        grad_std = -np.dot(K_trans, np.dot(K_inv,
                                                           grad))[0] / y_std
                    return y_mean, y_std, grad_mean, grad_std

                if return_std:
                    return y_mean, y_std, grad_mean
                else:
                    return y_mean, grad_mean
            else:
                if return_std:
                    return y_mean, y_std
                else:
                    return y_mean
예제 #33
0
 def test_cho_solve(self):
     x = array([[2,-1,0], [-1,2,-1], [0,-1,2]])
     xcho = cho_factor(x)
     assert_no_overwrite(lambda b: cho_solve(xcho, b), [(3,)])
예제 #34
0
def get_d(hessian, grad: np.ndarray):
    df2_i = cho_solve(cho_factor(hessian), np.eye(len(hessian)))
    d = np.matmul(grad, df2_i)
    return d
예제 #35
0
sigma_values = np.logspace(min_sigma, max_sigma, num_parameters)
lam = 1.0
lam_values = np.logspace(-7, 2, num_parameters)

# construct kernel matrices
K_train = rbf_kernel(X=x_train, gamma=mean_sigma)
K_test = rbf_kernel(X=x_train, Y=x_test, gamma=mean_sigma)

# slow method: solve problem
t0 = time()
alpha = scio.linalg.solve(K_train + lam * np.eye(x_train.shape[0]), y_train)
t1 = time() - t0
print('Time taken for solve: {}'.format(t1))

# fast method: cholesky decomposition manually
t0 = time()
R = cholesky(K_train + lam * np.eye(x_train.shape[0]))
alpha = scio.linalg.solve(R, scio.linalg.solve(R.T, y_train))
t1 = time() - t0
print('Time taken for cholesky manually: {}'.format(t1))

# fast method: cholesky decomposition with functions
t0 = time()
R, lower = cho_factor(K_train + lam * np.eye(x_train.shape[0]))
alpha = cho_solve((R, lower), y_train)
t1 = time() - t0
print('\nTime taken for cholesky with functions: {:.4f} secs\n'.format(t1))

# project the data
y_pred = (K_test.T @ alpha).squeeze()
예제 #36
0
파일: sr3.py 프로젝트: chen-lin/pysindy
 def _update_full_coef(self, cho, x_transpose_y, coef_sparse):
     """Update the unregularized weight vector"""
     b = x_transpose_y + coef_sparse / self.nu
     coef_full = cho_solve(cho, b)
     self.iters += 1
     return coef_full
예제 #37
0
 def get_d(x, grad, oracle):
     upper_triangle, _ = cho_factor(oracle.hess(x), lower=False, overwrite_a=True, check_finite=True)
     direction = cho_solve((upper_triangle, False), -grad, overwrite_b=False, check_finite=True)
     return direction
예제 #38
0
    def log_marginal_likeli(self, theta=None, eval_gradient=False,
                                clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.
        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.
        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.
        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.
        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.
        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions
        
        
        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator."
                        % self.kernel_,) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3

        pred1 = self.predict(x_unlabeled)
        pred1 = scaler_y.inverse_transform(pred1)
        phyloss = np.mean(poros(init_poro, pred1))
        log_likelihood -= 500000*phyloss
        #print(log_likelihood)
        
        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
예제 #39
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,), default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : ndarray of shape (n_kernel_params,), \
                optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when `eval_gradient` is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        # Compute log-marginal-likelihood Z and also store some temporaries
        # which can be reused for computing Z's gradient
        Z, (pi, W_sr, L, b, a) = \
            self._posterior_mode(K, return_temporaries=True)

        if not eval_gradient:
            return Z

        # Compute gradient based on Algorithm 5.1 of GPML
        d_Z = np.empty(theta.shape[0])
        # XXX: Get rid of the np.diag() in the next line
        R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr))  # Line 7
        C = solve(L, W_sr[:, np.newaxis] * K)  # Line 8
        # Line 9: (use einsum to compute np.diag(C.T.dot(C))))
        s_2 = -0.5 * (np.diag(K) - np.einsum('ij, ij -> j', C, C)) \
            * (pi * (1 - pi) * (1 - 2 * pi))  # third derivative

        for j in range(d_Z.shape[0]):
            C = K_gradient[:, :, j]  # Line 11
            # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))
            s_1 = .5 * a.T.dot(C).dot(a) - .5 * R.T.ravel().dot(C.ravel())

            b = C.dot(self.y_train_ - pi)  # Line 13
            s_3 = b - K.dot(R.dot(b))  # Line 14

            d_Z[j] = s_1 + s_2.T.dot(s_3)  # Line 15

        return Z, d_Z
예제 #40
0
    def fit(self, X, y):
        """Fit Gaussian process regression model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Feature vectors or other representations of training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values

        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.kernel_ = clone(self.kernel)

        self._rng = check_random_state(self.random_state)

        if self.kernel_.requires_vector_input:
            X, y = self._validate_data(X,
                                       y,
                                       multi_output=True,
                                       y_numeric=True,
                                       ensure_2d=True,
                                       dtype="numeric")
        else:
            X, y = self._validate_data(X,
                                       y,
                                       multi_output=True,
                                       y_numeric=True,
                                       ensure_2d=False,
                                       dtype=None)

        # Normalize target value
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            self._y_train_std = np.std(y, axis=0)

            # Remove mean and make unit variance
            y = (y - self._y_train_mean) / self._y_train_std

        else:
            self._y_train_mean = np.zeros(1)
            self._y_train_std = 1

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError(
                    "alpha must be a scalar or an array"
                    " with same number of entries as y.(%d != %d)" %
                    (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True, clone_kernel=False)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta,
                                                         clone_kernel=False)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.kernel_._check_bounds_params()

            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta,
                                             clone_kernel=False)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        return self
예제 #41
0
    def predict(self, xnew, merrorsnew=False, derivs=0, addnoise=False):
        """
        Determines the predicted mean latent function (.f) and its variance (.fvar) and potentially the predicted mean first derivative (.df) and its variance (.dfvar) and the predicted mean second derivative (.ddf) and its variance (.ddfvar) . Also .mnp is the predicted combined array of the mean latent function and its mean derivatives and .covp is the corresponding covariance matrix.

        Arguments
        --
        xnew: abscissa values for which predicted ordinate values are desired
        merrorsnew: if specified, the expected measurements errors at xnew (need not be specified if xnew= x)
        derivs: if 0, only the latent function is inferred; if 1, the latent function and the first derivative are inferred; if 2, the latent function and the first and second derivatives are inferred
        addnoise: if True, add measuremnet noise to the predicted variance
        """
        if len(self.x) == len(xnew) and (self.x == xnew).all():
            xold = True
        else:
            xold = False
        if np.any(self.merrors) and not np.any(merrorsnew) and not xold:
            print('Length of xnew is different from x.')
            raise gaussianprocessException(
                'Measurement errors were used to find the hyperparameters and measurement errors are therefore required for any predictions.'
            )
        elif not hasattr(self, 'lth_opt'):
            raise gaussianprocessException(
                ' Run gp.findhyperparameters() first before making predictions.'
            )
        else:
            # set up
            self.xnew = xnew
            lth, x, y = self.lth_opt, self.x, self.y
            # work with an array of length 3*N: the first N values being the function,
            # the second N values being the first derivative, and the last N values being the second derivative
            Knewold = np.empty((len(xnew), len(x)))
            Knewnew = np.empty((len(xnew), len(xnew)))
            if derivs > 0:
                d1Knewold = np.empty((len(xnew), len(x)))
                d1Knewnew = np.empty((len(xnew), len(xnew)))
                d1d2Knewnew = np.empty((len(xnew), len(xnew)))
            if derivs > 1:
                d12Knewold = np.empty((len(xnew), len(x)))
                d12Knewnew = np.empty((len(xnew), len(xnew)))
                d12d2Knewnew = np.empty((len(xnew), len(xnew)))
                d12d22Knewnew = np.empty((len(xnew), len(xnew)))
            for i in range(len(xnew)):
                Knewold[i, :] = self.covfn(xnew[i], x, lth)[0]
                Knewnew[i, :] = self.covfn(xnew[i], xnew, lth)[0]
                if derivs > 0:
                    d1Knewold[i, :] = self.d1covfn(xnew[i], x, lth)[0]
                    d1Knewnew[i, :] = self.d1covfn(xnew[i], xnew, lth)[0]
                    d1d2Knewnew[i, :] = self.d1d2covfn(xnew[i], xnew, lth)[0]
                if derivs > 1:
                    d12Knewold[i, :] = self.d12covfn(xnew[i], x, lth)[0]
                    d12Knewnew[i, :] = self.d12covfn(xnew[i], xnew, lth)[0]
                    d12d2Knewnew[i, :] = self.d12d2covfn(xnew[i], xnew, lth)[0]
                    d12d22Knewnew[i, :] = self.d12d22covfn(xnew[i], xnew,
                                                           lth)[0]
            if derivs == 0:
                kv = Knewold
                km = Knewnew
            elif derivs == 1:
                kv = np.bmat([[Knewold], [d1Knewold]])
                km = np.bmat([[Knewnew, np.transpose(d1Knewnew)],
                              [d1Knewnew, d1d2Knewnew]])
            elif derivs == 2:
                kv = np.bmat([[Knewold], [d1Knewold], [d12Knewold]])
                km = np.bmat([[
                    Knewnew,
                    np.transpose(d1Knewnew),
                    np.transpose(d12Knewnew)
                ], [d1Knewnew, d1d2Knewnew,
                    np.transpose(d12d2Knewnew)],
                              [d12Knewnew, d12d2Knewnew, d12d22Knewnew]])
            # find mean prediction
            k, L = self.kernelmatrix(lth, x)
            m = np.dot(kv, linalg.cho_solve(L, y))
            mnp = np.reshape(np.array(m), np.size(m))
            self.mnp = mnp
            # find variance of prediction
            covp = km - np.dot(kv, linalg.cho_solve(L, np.transpose(kv)))
            self.covp = covp
            varp = np.diag(covp)
            # for user
            self.f = mnp[:len(xnew)]
            self.fvar = varp[:len(xnew)]
            fvar = varp[:len(xnew)]
            if addnoise:
                # add measurement error to the variance of the latent function
                if np.any(self.merrors):
                    if xold:
                        self.fvar = fvar + np.exp(lth[-1]) * np.diag(
                            self.merrors)
                    else:
                        self.fvar = fvar + merrorsnew
                else:
                    self.fvar = fvar + np.exp(lth[-1]) * np.identity(len(xnew))
            else:
                # just take the variance of the latent function
                self.fvar = fvar
            if derivs > 0:
                self.df = mnp[len(xnew):2 * len(xnew)]
                self.dfvar = varp[len(xnew):2 * len(xnew)]
            if derivs > 1:
                self.ddf = mnp[2 * len(xnew):]
                self.ddfvar = varp[2 * len(xnew):]
예제 #42
0
    def predict(self, X, return_std=False, return_cov=False):
        """Predict using the Gaussian process regression model

        We can also predict based on an unfitted model by using the GP prior.
        In addition to the mean of the predictive distribution, also its
        standard deviation (return_std=True) or covariance (return_cov=True).
        Note that at most one of the two can be requested.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Query points where the GP is evaluated.

        return_std : bool, default=False
            If True, the standard-deviation of the predictive distribution at
            the query points is returned along with the mean.

        return_cov : bool, default=False
            If True, the covariance of the joint predictive distribution at
            the query points is returned along with the mean.

        Returns
        -------
        y_mean : ndarray of shape (n_samples, [n_output_dims])
            Mean of predictive distribution a query points.

        y_std : ndarray of shape (n_samples,), optional
            Standard deviation of predictive distribution at query points.
            Only returned when `return_std` is True.

        y_cov : ndarray of shape (n_samples, n_samples), optional
            Covariance of joint predictive distribution a query points.
            Only returned when `return_cov` is True.
        """
        if return_std and return_cov:
            raise RuntimeError(
                "Not returning standard deviation of predictions when "
                "returning full covariance.")

        if self.kernel is None or self.kernel.requires_vector_input:
            X = self._validate_data(X,
                                    ensure_2d=True,
                                    dtype="numeric",
                                    reset=False)
        else:
            X = self._validate_data(X,
                                    ensure_2d=False,
                                    dtype=None,
                                    reset=False)

        if not hasattr(self, "X_train_"):  # Unfitted;predict based on GP prior
            if self.kernel is None:
                kernel = (C(1.0, constant_value_bounds="fixed") *
                          RBF(1.0, length_scale_bounds="fixed"))
            else:
                kernel = self.kernel
            y_mean = np.zeros(X.shape[0])
            if return_cov:
                y_cov = kernel(X)
                return y_mean, y_cov
            elif return_std:
                y_var = kernel.diag(X)
                return y_mean, np.sqrt(y_var)
            else:
                return y_mean
        else:  # Predict based on GP posterior
            K_trans = self.kernel_(X, self.X_train_)
            y_mean = K_trans.dot(self.alpha_)  # Line 4 (y_mean = f_star)

            # undo normalisation
            y_mean = self._y_train_std * y_mean + self._y_train_mean

            if return_cov:
                v = cho_solve((self.L_, True), K_trans.T)  # Line 5
                y_cov = self.kernel_(X) - K_trans.dot(v)  # Line 6

                # undo normalisation
                y_cov = y_cov * self._y_train_std**2

                return y_mean, y_cov
            elif return_std:
                # cache result of K_inv computation
                if self._K_inv is None:
                    # compute inverse K_inv of K based on its Cholesky
                    # decomposition L and its inverse L_inv
                    L_inv = solve_triangular(self.L_.T,
                                             np.eye(self.L_.shape[0]))
                    self._K_inv = L_inv.dot(L_inv.T)

                # Compute variance of predictive distribution
                y_var = self.kernel_.diag(X)
                y_var -= np.einsum("ij,ij->i", np.dot(K_trans, self._K_inv),
                                   K_trans)

                # Check if any of the variances is negative because of
                # numerical issues. If yes: set the variance to 0.
                y_var_negative = y_var < 0
                if np.any(y_var_negative):
                    warnings.warn("Predicted variances smaller than 0. "
                                  "Setting those variances to 0.")
                    y_var[y_var_negative] = 0.0

                # undo normalisation
                y_var = y_var * self._y_train_std**2

                return y_mean, np.sqrt(y_var)
            else:
                return y_mean
예제 #43
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
예제 #44
0
    def _update_cache(self):
        """
        Update cache
        """
        cov_params_have_changed = self.Cr.params_have_changed or self.Cg.params_have_changed or self.Cn.params_have_changed

        if self.XX_has_changed:
            start = TIME.time()
            """ Row SVD Bg + Noise """
            self.cache['Srstar'],Urstar  = LA.eigh(self.XX)
            self.cache['Lr']   = Urstar.T
            self.mean.setRowRotation(Lr=self.cache['Lr'])

            smartSum(self.time,'cache_XXchanged',TIME.time()-start)
            smartSum(self.count,'cache_XXchanged',1)
        
        if self.Xr_has_changed or self.XX_has_changed:
            start = TIME.time()
            """ rotate Xr and XrXr """
            self.cache['LXr']    = SP.dot(self.cache['Lr'],self.Xr)
            smartSum(self.time,'cache_Xrchanged',TIME.time()-start)
            smartSum(self.count,'cache_Xrchanged',1)

        if cov_params_have_changed:
            start = TIME.time()
            """ Col SVD Bg + Noise """
            S2,U2 = LA.eigh(self.Cn.K()+self.offset*SP.eye(self.P))
            self.cache['Sc2'] = S2
            US2   = SP.dot(U2,SP.diag(SP.sqrt(S2)))
            USi2  = SP.dot(U2,SP.diag(SP.sqrt(1./S2)))
            Cstar = SP.dot(USi2.T,SP.dot(self.Cg.K(),USi2))
            self.cache['Scstar'],Ucstar = LA.eigh(Cstar)
            self.cache['Lc'] = SP.dot(Ucstar.T,USi2.T)

            """ pheno """
            self.mean.setColRotation(self.cache['Lc'])

            """ region part """
            self.cache['A']   = SP.reshape(self.Cr.getParams(),(self.P,self.rank),order='F')
            self.cache['LAc'] = SP.dot(self.cache['Lc'],self.cache['A'])

        if cov_params_have_changed or self.XX_has_changed:
            """ S """
            self.cache['s'] = SP.kron(self.cache['Scstar'],self.cache['Srstar'])+1
            self.cache['d'] = 1./self.cache['s']
            self.cache['D'] = SP.reshape(self.cache['d'],(self.N,self.P), order='F')

            """ pheno """
            self.cache['LY']  = self.mean.evaluate()
            self.cache['DLY'] = self.cache['D']*self.cache['LY']

            smartSum(self.time,'cache_colSVDpRot',TIME.time()-start)
            smartSum(self.count,'cache_colSVDpRot',1)

        if cov_params_have_changed or self.XX_has_changed or self.Xr_has_changed:

            """ calculate B =  I + kron(LcA,LrXr).T*D*kron(kron(LcA,LrXr)) """
            start = TIME.time()
            W                = SP.kron(self.cache['LAc'],self.cache['LXr'])
            self.cache['DW']  = W*self.cache['d'][:,SP.newaxis]
            self.cache['DWt'] = self.cache['DW'].reshape((self.N,self.P,self.rank*self.S),order='F')
            #B  = NP.einsum('ijk,jl->ilk',self.cache['DWt'],self.cache['LAc'])
            #B  = NP.einsum('ji,jlk->ilk',self.cache['LXr'],B)
            B = SP.tensordot(self.cache['DWt'],self.cache['LAc'],axes=(1,0)) 
            B = NP.transpose(B, (0, 2, 1))
            B = SP.tensordot(self.cache['LXr'],B,axes=(0,0))
            B = B.reshape((self.rank*self.S,self.rank*self.S),order='F')
            B+= SP.eye(self.rank*self.S)
            smartSum(self.time,'cache_calcB',TIME.time()-start)
            smartSum(self.count,'cache_calcB',1)

            """ invert B """
            start = TIME.time()
            self.cache['cholB'] = LA.cholesky(B).T
            self.cache['Bi']    = LA.cho_solve((self.cache['cholB'],True),SP.eye(self.S*self.rank))
            smartSum(self.time,'cache_invB',TIME.time()-start)
            smartSum(self.count,'cache_invB',1)
            
            """ pheno """
            start = TIME.time()
            Z = SP.dot(self.cache['LXr'].T,SP.dot(self.cache['DLY'],self.cache['LAc']))
            self.cache['z']           = SP.reshape(Z,(self.S*self.rank), order='F')
            self.cache['Biz']         = LA.cho_solve((self.cache['cholB'],True),self.cache['z'])
            BiZ = SP.reshape(self.cache['Biz'],(self.S,self.rank), order='F')
            self.cache['DLYpDLXBiz']  = SP.dot(self.cache['LXr'],SP.dot(BiZ,self.cache['LAc'].T))
            self.cache['DLYpDLXBiz'] *= -self.cache['D']
            self.cache['DLYpDLXBiz'] += self.cache['DLY']
            smartSum(self.time,'cache_phenoCalc',TIME.time()-start)
            smartSum(self.count,'cache_phenoCalc',1)

        self.XX_has_changed = False
        self.Xr_has_changed = False
        self.Y_has_changed  = False
        self.Cr.params_have_changed = False
        self.Cg.params_have_changed = False
        self.Cn.params_have_changed = False
예제 #45
0
파일: lobpcg.py 프로젝트: chenusc11/CS231n
def _applyConstraints(blockVectorV, factYBY, blockVectorBY, blockVectorY):
    """Changes blockVectorV in place."""
    gramYBV = np.dot(blockVectorBY.T, blockVectorV)
    tmp = cho_solve(factYBY, gramYBV)
    blockVectorV -= np.dot(blockVectorY, tmp)
예제 #46
0
 def kinetic_energy(self, pos, mom, cache={}):
     return 0.5 * mom.dot(la.cho_solve((self.mass_matrix_chol, True), mom))
예제 #47
0
def newton(oracle,
           x_0,
           tolerance=1e-5,
           max_iter=100,
           line_search_options=None,
           trace=False,
           display=False):
    """
    Newton's optimization method.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess() methods implemented for computing
        function value, its gradient and Hessian respectively. If the Hessian
        returned by the oracle is not positive-definite method stops with message="newton_direction_error"
    x_0 : np.array
        Starting point for optimization algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    trace : bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.
    display : bool
        If True, debug information is displayed during optimization.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
                the stopping criterion.
            - 'newton_direction_error': in case of failure of solving linear system with Hessian matrix (e.g. non-invertible matrix).
            - 'computational_error': in case of getting Infinity or None value during the computations.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time passed from the start of the method
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2

    Example:
    --------
    >> oracle = QuadraticOracle(np.eye(5), np.arange(5))
    >> x_opt, message, history = newton(oracle, np.zeros(5), line_search_options={'method': 'Constant', 'c': 1.0})
    >> print('Found optimal point: {}'.format(x_opt))
       Found optimal point: [ 0.  1.  2.  3.  4.]
    """
    history = defaultdict(list) if trace else None
    line_search_tool = LineSearchTool(**line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement Newton's method.
    # Use line_search_tool.line_search() for adaptive step size.
    def fill_history():
        if not trace:
            return
        history['time'].append(datetime.now() - t_0)
        history['func'].append(func_k)
        history['grad_norm'].append(grad_k_norm)
        if len(x_k) <= 2:
            history['x'].append(np.copy(x_k))

    def get_alpha(x_concat, d_concat):
        x, u = np.array_split(x_concat, 2)
        grad_x, grad_u = np.array_split(d_concat, 2)
        alphas = [1.]
        THETA = 0.99
        for i in range(len(grad_x)):
            if grad_x[i] > grad_u[i]:
                alphas.append(THETA * (u[i] - x[i]) / (grad_x[i] - grad_u[i]))
            if grad_x[i] < -grad_u[i]:
                alphas.append(THETA * (x[i] + u[i]) / (-grad_x[i] - grad_u[i]))
        return min(alphas)

    t_0 = datetime.now()
    func_k = oracle.func(x_k)
    grad_k = oracle.grad(x_k)
    hess_k = oracle.hess(x_k)
    grad_0_norm = grad_k_norm = np.linalg.norm(grad_k)
    fill_history()
    if display:
        print('Begin new NM')

    for i in range(max_iter):
        if display:
            print('i = {} grad_norm = {} func = {} x = {} grad = {}'.format(
                i, grad_k_norm, func_k, x_k, grad_k))
        if grad_k_norm**2 <= tolerance * grad_0_norm**2:
            break
        try:
            d_k = cho_solve(cho_factor(hess_k), -grad_k)
        except LinAlgError:
            return x_k, 'computational_error', history

        a_k = line_search_tool.line_search(oracle,
                                           x_k,
                                           d_k,
                                           previous_alpha=get_alpha(x_k, d_k))
        x_k += a_k * d_k
        func_k = oracle.func(x_k)
        grad_k = oracle.grad(x_k)
        hess_k = oracle.hess(x_k)
        grad_k_norm = np.linalg.norm(grad_k)
        fill_history()

    if grad_k_norm**2 <= tolerance * grad_0_norm**2:
        return x_k, 'success', history
    else:
        return x_k, 'iterations_exceeded', history
예제 #48
0
def submatrix_inv_mult(M, Minv, imask, Y, MinvY, pad=True, bruteforce=False):
    """
    Returns (inverse of submatrix of M) * Y

        Parameters:
            M (np.ndarray) N x N: symmetric and positive semi-definite matrix
            Minv (np.ndarray) N x N: inverse of M
            imask (np.ndarray) N x N: mask of rows/columns to use 
                                      (1 == keep, 0 == remove); contains Nk ones and Nr zeros 
            Y (np.ndarray) Nspec x N:  matrix multiply Ainv by; assumed to be zero-padded
            MinvY (np.ndarray) N x Nspec:  matrix Minv * Y 
            pad (bool)        : flag for zero-padding
            bruteforce (bool) : flag for using bruteforce approach

        Returns:
            Ainvy (np.ndarray): Inverse of A (submatrix of M) times Y (nspec X N - Nr)
                                where Nr = number of removed rows.
                                - If pad is True, then zero-padded to nspec X N with 
                                0 at each removed row

        Comments:
            Let M be block matrix given by
                |A   B|              |P   Q| 
            M = |     | and M^{-1} = |     |
                |B.T D|              |Q.T U|

            Then inverse of A is Schur complement of U
            A^{-1} = P - Q U^{-1} Q.T

            U and M must be invertible and positive semi-definite.
            This returns,
            A^{-1} Y = P Y - (Q U^{-1} Q.T) Y

            Y is assumed to be zero-padded at bad rows    
    """

    #verify proper dimensionalities
    assert M.shape[0] == M.shape[1], "M must be a square matrix."
    assert imask.shape[0] == M.shape[
        0], "M and imask have incompatible dimensions."
    assert Y.ndim == 2, "Y must be column vector."
    assert MinvY.ndim == 2, "MinvY must be column vector."

    #rows/columns to keep (k) and remove (r)
    k = np.where(imask.any(axis=1))[0]  #?? assume imask symmetric
    nk = len(k)

    r = np.where(~imask.any(
        axis=1))[0]  #must convert to bool since ~ is bitwise complement
    nr = len(r)

    if bruteforce:
        A = (M[k, :])[:, k]
        Ainv = cholesky_inv(A)
        Ainvy = Ainv.T @ Y[k, :]
        return Ainvy

    if (nr == 0):
        print("imask does not remove any rows or columns.")
        return MinvY

    #Use Q.T y = Minv y - U y ?? why? known result?
    U = (Minv[r, :])[:, r]
    Yr = Y[r, :]
    Qty = MinvY[r, :] - np.dot(U, Yr)
    Qt = (Minv[r, :])[:, k]

    #evaluate A^{-1} Y = P Y - Q U^{-1} Q.T Y

    #Faster for big U (and fast enough for small U)
    if (U.shape[0] == 1):
        UinvQtY = Qty / U[0]
    else:
        L = linalg.cho_factor(U, lower=False, check_finite=False)
        UinvQtY = linalg.cho_solve(L, Qty, overwrite_b=False)

    #Evaluate A^{-1} Y = P Y - Q U^{-1} Q^T Y
    #using P Y = Minv Y - Q Y

    if (pad):
        AinvY0 = deepcopy(MinvY)
        AinvY0[k, :] -= Qt.T @ (UinvQtY + Yr)
        AinvY0[r, :] = 0
        return AinvY0
    else:
        AinvY = MinvY[k, :] - ((UinvQtY + Yr).T @ Qt).T
        return AinvY
예제 #49
0
def multi_harmonic_fit(time,
                       data,
                       error,
                       freq,
                       nharm=4,
                       return_model=False,
                       freq_sep=0.01,
                       fit_mean=True,
                       fit_slope=False):
    """
    Simultaneous fit of a sum of sinusoids by weighted, linear least squares.
       model(t) = C0 + C1*(t-t0) + Sum_i Sum_j Aij sin(2*pi*j*fi*(t-t0)+phij), i=[1,nfreq], j=[1,nharm]
         [t0 defined such that ph11=0]

    Input:
        time: x vector
        data: y vector
        error: uncertainty on data
        freq: one or more frequencies freq_i to fit
        nharm: number of harmonics of each frequency to fit (nharm=1 is just fundamental)
              fij = fi, 2*fi, ... nharm*fi
        freq_sep: freq_ij seperated by less than this are ignored (should be the search grid spacing)
        fit_slope=False, then C1=0
        fit_mean=False, then C0=0

    Output:
        A dictionary containing the model evaluated on the time grid (if return_model==True) and 
        the model amplitudes Aij, phases phij, and their uncertainties.
    """
    t = time.astype('float64')
    r = data.astype('float64')
    dr = error.astype('float64')

    numt = len(t)

    wt = 1. / dr**2
    s0 = wt.sum()
    t0 = (t * wt).sum() / s0
    t -= t0

    dr *= sqrt(s0)
    r0 = (r * wt).sum() / s0
    r -= r0

    nfit = 0
    if (fit_mean == True):
        nfit = 1

    if (fit_slope == True):
        fit_mean = True
        nfit = 2
        tm = t.max()
        s1 = ((t / tm)**2 * wt).sum()
        sb = ((t / tm) * r * wt).sum()
        slope = sb / s1
        s1 /= s0
        r -= slope * t / tm
        tt = t / tm / dr

    rr = r / dr
    chi0 = dot(rr, rr) * s0

    matr = empty((nfit + 2 * nharm, nfit + 2 * nharm), dtype='float64')
    vec = empty(nfit + 2 * nharm, dtype='float64')

    sx = empty((nharm, numt), dtype='float64')
    cx = empty((nharm, numt), dtype='float64')

    #
    # We will solve matr*res = vec, for res.  Define matr and vec.
    #
    sx0, cx0 = sin(2 * pi * t * freq), cos(2 * pi * t * freq)
    sx[0, :] = sx0 / dr
    cx[0, :] = cx0 / dr
    for i in xrange(nharm - 1):
        sx[i + 1, :] = cx0 * sx[i, :] + sx0 * cx[i, :]
        cx[i + 1, :] = -sx0 * sx[i, :] + cx0 * cx[i, :]

    if (nfit > 0):
        vec[0] = 0.
        matr[0, 0] = 1.
    if (nfit > 1):
        vec[1] = matr[0, 1] = matr[1, 0] = 0.
        matr[1, 1] = s1

    for i in xrange(nharm):
        vec[i + nfit] = dot(sx[i, :], rr)
        vec[nharm + i + nfit] = dot(cx[i, :], rr)
        if (nfit > 0):
            matr[0, i + nfit] = matr[i + nfit, 0] = dot(sx[i, :], 1. / dr)
            matr[0, nharm + i + nfit] = matr[nharm + i + nfit,
                                             0] = dot(cx[i, :], 1. / dr)
        if (nfit > 1):
            matr[1, i + nfit] = matr[i + nfit, 1] = dot(sx[i, :], tt)
            matr[1, nharm + i + nfit] = matr[nharm + i + nfit,
                                             1] = dot(cx[i, :], tt)
        for j in xrange(i + 1):
            matr[j + nfit,
                 i + nfit] = matr[i + nfit,
                                  j + nfit] = dot(sx[i, :], sx[j, :])
            matr[j + nfit,
                 nharm + i + nfit] = matr[nharm + i + nfit,
                                          j + nfit] = dot(cx[i, :], sx[j, :])
            matr[nharm + j + nfit,
                 i + nfit] = matr[i + nfit,
                                  nharm + j + nfit] = dot(sx[i, :], cx[j, :])
            matr[nharm + j + nfit,
                 nharm + i + nfit] = matr[nharm + i + nfit,
                                          nharm + j + nfit] = dot(
                                              cx[i, :], cx[j, :])

    out_dict = {}

    #
    # Convert to amplitudes and phases and propagate errors
    #
    out_dict['cn0'] = r0
    out_dict['cn0_error'] = 1. / sqrt(s0)
    out_dict['trend'] = 0.
    out_dict['trend_error'] = 0.

    A0, B0, vA0, vB0, covA0B0 = zeros((5, nharm), dtype='float64')
    amp, phase, rel_phase = zeros((3, nharm), dtype='float64')
    damp, dphase = zeros((2, nharm), dtype='float64')
    covA0B0 = zeros(nharm, dtype='float64')
    res = zeros(nfit + 2 * nharm, dtype='float64')
    err2 = zeros(nfit + 2 * nharm, dtype='float64')

    out_dict['bayes_factor'] = 0.

    try:
        #
        # solve the equation and replace matr with its inverse
        #
        m0 = cho_factor(matr, lower=False)
        out_dict['bayes_factor'] = -log(trace(m0[0]))
        res = cho_solve(m0, vec)
        CholeskyInverse(m0[0], matr)

        A0, B0 = res[nfit:nharm + nfit], res[nharm + nfit:]
        amp = sqrt(A0**2 + B0**2)
        phase = arctan2(B0, A0)

        err2 = diag(matr) / s0
        vA0, vB0 = err2[nfit:nharm + nfit], err2[nharm + nfit:]
        for i in xrange(nharm):
            covA0B0[i] = matr[nfit + i, nharm + nfit + i] / s0

        damp = sqrt(A0**2 * vA0 + B0**2 * vB0 + 2. * A0 * B0 * covA0B0) / amp
        dphase = sqrt(A0**2 * vB0 + B0**2 * vA0 -
                      2. * A0 * B0 * covA0B0) / amp**2
        rel_phase = phase - phase[0] * (1. + arange(nharm))
        rel_phase = arctan2(sin(rel_phase), cos(rel_phase))

    except:
        print(
            "Failed: singular matrix! (Are your frequencies unique/non-harmonic?)"
        )

    out_dict['time0'] = t0 - phase[0] / (2 * pi * freq)
    out_dict["amplitude"] = amp
    out_dict["amplitude_error"] = damp
    out_dict["rel_phase"] = rel_phase
    out_dict["rel_phase_error"] = dphase

    modl = r0 + dot(A0, sx * dr) + dot(B0, cx * dr)
    if (nfit > 0):
        out_dict['cn0'] += res[0]
        out_dict['cn0_error'] = sqrt(err2[0])
        modl += res[0]
    if (nfit > 1):
        out_dict['trend'] = (res[1] + slope) / tm
        out_dict['trend_error'] = sqrt(err2[1]) / tm
        modl += out_dict['trend'] * t
        ###
        #import os
        #import matplotlib.pyplot as pyplot
        #t_folded = t % (1./freq)
        #pyplot.title("nfit=%d After modl += res[0] and modl += out_dict['trend']*t" % (nfit))

        #pyplot.plot(t_folded, data, 'bo', ms=3)
        #pyplot.plot(t_folded, modl, 'ro', ms=3)
        #pyplot.plot(t_folded, modl - out_dict['trend']*t, 'mo', ms=3)
        #pyplot.plot(t_folded, out_dict['trend']*t, 'go', ms=3)
        ##pyplot.plot(t, data, 'bo', ms=3)
        ##pyplot.plot(t, modl, 'ro', ms=3)
        ##pyplot.plot(t, modl - out_dict['trend']*t, 'mo', ms=3)
        ##pyplot.plot(t, out_dict['trend']*t, 'go', ms=3)
        ###pyplot.show()
        ##fpath = '/tmp/multiharmonic.ps'
        ##pyplot.savefig(fpath)
        ##os.system('gv %s &' % (fpath))
        #import pdb; pdb.set_trace()
        ###
        resid = (modl - r - r0 - slope * tt * dr) / dr
        out_dict['chi2'] = dot(resid, resid) * s0
        out_dict['cn0'] += out_dict['trend'] * (out_dict['time0'] - t0)
    else:
        resid = (modl - r - r0) / dr
        out_dict['chi2'] = dot(resid, resid) * s0

    ###
    #import os
    #import matplotlib.pyplot as pyplot
    #t_folded = t % (1./freq)
    #pyplot.title("nfit=%d freq=%f End" % (nfit, freq))

    #pyplot.plot(t_folded, data, 'bo', ms=3)
    #pyplot.plot(t_folded, modl, 'ro', ms=3)
    #pyplot.plot(t_folded, modl - out_dict['trend']*t, 'mo', ms=3)
    #pyplot.plot(t_folded, out_dict['trend']*t, 'go', ms=3)
    ##pyplot.plot(t, data, 'bo', ms=3)
    ##pyplot.plot(t, modl, 'ro', ms=3)
    ##pyplot.plot(t, modl - out_dict['trend']*t, 'mo', ms=3)
    ##pyplot.plot(t, out_dict['trend']*t, 'go', ms=3)
    ###pyplot.show()
    #fpath = '/tmp/multiharmonic.ps'
    #pyplot.savefig(fpath)
    #os.system('gv %s &' % (fpath))
    #import pdb; pdb.set_trace()
    #pyplot.clf()
    ###

    out_dict['nu'] = numt - 2 * nharm - nfit
    out_dict['signif'] = chi2sigma(chi0, out_dict['chi2'], numt - nfit, nharm)
    if (return_model):
        out_dict['model'] = modl

    return out_dict
def _precond(LorU, lower, x):
    y = cho_solve((LorU, lower), x)
    return _as2d(y)
예제 #51
0
    def solve(self, tr_radius):
        """Solve quadratic subproblem"""

        lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
        n = self.dimension
        hits_boundary = True
        already_factorized = False
        self.niter = 0

        while True:

            # Compute Cholesky factorization
            if already_factorized:
                already_factorized = False
            else:
                H = self.hess + lambda_current * np.eye(n)
                U, info = self.cholesky(H,
                                        lower=False,
                                        overwrite_a=False,
                                        clean=True)

            self.niter += 1

            # Check if factorization succeeded
            if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
                # Successful factorization

                # Solve `U.T U p = s`
                p = cho_solve((U, False), -self.jac)

                p_norm = norm(p)

                # Check for interior convergence
                if p_norm <= tr_radius and lambda_current == 0:
                    hits_boundary = False
                    break

                # Solve `U.T w = p`
                w = solve_triangular(U, p, trans='T')

                w_norm = norm(w)

                # Compute Newton step accordingly to
                # formula (4.44) p.87 from ref [2]_.
                delta_lambda = (p_norm / w_norm)**2 * (p_norm -
                                                       tr_radius) / tr_radius
                lambda_new = lambda_current + delta_lambda

                if p_norm < tr_radius:  # Inside boundary
                    s_min, z_min = estimate_smallest_singular_value(U)

                    ta, tb = self.get_boundaries_intersections(
                        p, z_min, tr_radius)

                    # Choose `step_len` with the smallest magnitude.
                    # The reason for this choice is explained at
                    # ref [3]_, p. 6 (Immediately before the formula
                    # for `tau`).
                    step_len = min([ta, tb], key=abs)

                    # Compute the quadratic term  (p.T*H*p)
                    quadratic_term = np.dot(p, np.dot(H, p))

                    # Check stop criteria
                    relative_error = (step_len**2 * s_min**2) / (
                        quadratic_term + lambda_current * tr_radius**2)
                    if relative_error <= self.k_hard:
                        p += step_len * z_min
                        break

                    # Update uncertanty bounds
                    lambda_ub = lambda_current
                    lambda_lb = max(lambda_lb, lambda_current - s_min**2)

                    # Compute Cholesky factorization
                    H = self.hess + lambda_new * np.eye(n)
                    c, info = self.cholesky(H,
                                            lower=False,
                                            overwrite_a=False,
                                            clean=True)

                    # Check if the factorization have succeeded
                    #
                    if info == 0:  # Successful factorization
                        # Update damping factor
                        lambda_current = lambda_new
                        already_factorized = True
                    else:  # Unsuccessful factorization
                        # Update uncertanty bounds
                        lambda_lb = max(lambda_lb, lambda_new)

                        # Update damping factor
                        lambda_current = max(
                            np.sqrt(lambda_lb * lambda_ub), lambda_lb +
                            self.UPDATE_COEFF * (lambda_ub - lambda_lb))

                else:  # Outside boundary
                    # Check stop criteria
                    relative_error = abs(p_norm - tr_radius) / tr_radius
                    if relative_error <= self.k_easy:
                        break

                    # Update uncertanty bounds
                    lambda_lb = lambda_current

                    # Update damping factor
                    lambda_current = lambda_new

            elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
                # jac_mag very close to zero

                # Check for interior convergence
                if lambda_current == 0:
                    p = np.zeros(n)
                    hits_boundary = False
                    break

                s_min, z_min = estimate_smallest_singular_value(U)
                step_len = tr_radius

                # Check stop criteria
                if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2:
                    p = step_len * z_min
                    break

                # Update uncertanty bounds
                lambda_ub = lambda_current
                lambda_lb = max(lambda_lb, lambda_current - s_min**2)

                # Update damping factor
                lambda_current = max(
                    np.sqrt(lambda_lb * lambda_ub),
                    lambda_lb + self.UPDATE_COEFF * (lambda_ub - lambda_lb))

            else:  # Unsuccessful factorization

                # Compute auxiliary terms
                delta, v = singular_leading_submatrix(H, U, info)
                v_norm = norm(v)

                # Update uncertanty interval
                lambda_lb = max(lambda_lb, lambda_current + delta / v_norm**2)

                # Update damping factor
                lambda_current = max(
                    np.sqrt(lambda_lb * lambda_ub),
                    lambda_lb + self.UPDATE_COEFF * (lambda_ub - lambda_lb))

        self.lambda_lb = lambda_lb
        self.lambda_current = lambda_current
        self.previous_tr_radius = tr_radius

        return p, hits_boundary
예제 #52
0
def gp_fit_demo(f, pars, xrng=(-1, 1, 50), save_figs=False, alpha=1.0, el=1.0):
    xs = np.linspace(*xrng)  # test set
    fx = np.apply_along_axis(f, 0, xs[na, :], pars).squeeze()
    xtr = np.sqrt(3) * np.array([-1, 1], dtype=float)  # train set
    ytr = np.apply_along_axis(f, 0, xtr[na, :], pars).squeeze(
    )  # function observations + np.random.randn(xtr.shape[0])
    dtr = np.apply_along_axis(f, 0, xtr[na, :], pars,
                              dx=True).squeeze()  # derivative observations
    y = np.hstack((ytr, dtr))
    m, n = len(xs), len(xtr)  # train and test points
    jitter = 1e-8
    # evaluate kernel matrices
    kss, kfd, kdd = kern_rbf_der(xs, xs, alpha=alpha, el=el)
    kff, kfd, kdd = kern_rbf_der(xs, xtr, alpha=alpha, el=el)
    kfy = np.hstack((kff, kfd))
    Kff, Kfd, Kdd = kern_rbf_der(xtr, xtr, alpha=alpha, el=el)
    K = np.vstack((np.hstack((Kff, Kfd)), np.hstack((Kfd.T, Kdd))))
    # GP fit w/ function values only
    kff_iK = cho_solve(cho_factor(Kff + jitter * np.eye(n)), kff.T).T
    gp_mean = kff_iK.dot(ytr)
    gp_var = np.diag(kss - kff_iK.dot(kff.T))
    gp_std = np.sqrt(gp_var)
    # GP fit w/ functionn values and derivatives
    kfy_iK = cho_solve(cho_factor(K + jitter * np.eye(n + n * 1)),
                       kfy.T).T  # kx.dot(inv(K))
    gp_mean_d = kfy_iK.dot(y)
    gp_var_d = np.diag(kss - kfy_iK.dot(kfy.T))
    gp_std_d = np.sqrt(gp_var_d)

    # setup plotting
    fmin, fmax, fp2p = np.min(fx), np.max(fx), np.ptp(fx)
    axis_limits = [-3, 3, fmin - 0.2 * fp2p, fmax + 0.2 * fp2p]
    tick_settings = {
        'which': 'both',
        'bottom': 'off',
        'top': 'off',
        'left': 'off',
        'right': 'off',
        'labelleft': 'off',
        'labelbottom': 'off'
    }
    # use tex to render text in the figure
    mpl.rc('text', usetex=True)
    # use lmodern font package which is also used in the paper
    mpl.rc('text.latex', preamble=[r'\usepackage{lmodern}'])
    # sans serif font for figure, size 10pt
    mpl.rc('font', family='sans-serif', size=10)
    plt.style.use('seaborn-paper')
    # set figure width to fit the column width of the article
    pti = 1.0 / 72.0  # 1 inch = 72 points
    fig_width_pt = 244  # obtained from latex using \the\columnwidth
    golden_mean = (np.sqrt(5.0) - 1.0) / 2.0
    fig_w = fig_width_pt * pti * 1.0
    fig_h = fig_w * golden_mean
    plt.figure(figsize=(fig_w, fig_h))

    # # plot ordinary GP regression fit
    # plt.subplot(211)
    # plt.axis(axis_limits)
    # plt.tick_params(**tick_settings)
    # plt.title('GP regression')
    # plt.plot(xs, fx, 'r--', label='true')
    # plt.plot(xtr, ytr, 'ko', ms=8, label='observed fcn values')
    # plt.plot(xs, gp_mean, 'k-', lw=2, label='GP mean')
    # plt.fill_between(xs, gp_mean - 2 * gp_std, gp_mean + 2 * gp_std, color='k', alpha=0.15)
    # # plot GP regression fit w/ derivative observations
    # plt.subplot(212)
    # plt.axis(axis_limits)
    # plt.tick_params(**tick_settings)
    # plt.title('GP regression with gradient observations')
    # plt.plot(xs, fx, 'r--', label='true')
    # plt.plot(xtr, ytr, 'ko', ms=8, label='observed fcn values')
    # plt.plot(xs, gp_mean_d, 'k-', lw=2, label='GP mean')
    # plt.fill_between(xs, gp_mean_d - 2 * gp_std_d, gp_mean_d + 2 * gp_std_d, color='k', alpha=0.15)
    # # plot line segments to indicate derivative observations
    # h = 0.15
    # for i in range(len(dtr)):
    #     x0, x1 = xtr[i] - h, xtr[i] + h
    #     y0 = dtr[i] * (x0 - xtr[i]) + ytr[i]
    #     y1 = dtr[i] * (x1 - xtr[i]) + ytr[i]
    #     plt.gca().add_line(Line2D([x0, x1], [y0, y1], linewidth=6, color='k'))
    # plt.tight_layout()
    # if save_figs:
    #     plt.savefig('{}_gpr_grad_compar.pdf'.format(f.__name__), format='pdf')
    # else:
    #     plt.show()

    # two figure version
    scale = 0.5
    fig_width_pt = 244 / 2
    fig_w = fig_width_pt * pti
    fig_h = fig_w * golden_mean * 1
    # plot ordinary GP regression fit
    plt.figure(figsize=(fig_w, fig_h))
    plt.axis(axis_limits)
    plt.tick_params(**tick_settings)
    plt.plot(xs, fx, 'r--', label='true')
    plt.plot(xtr, ytr, 'ko', ms=8, label='observed fcn values')
    plt.plot(xs, gp_mean, 'k-', lw=2, label='GP mean')
    plt.fill_between(xs,
                     gp_mean - 2 * gp_std,
                     gp_mean + 2 * gp_std,
                     color='k',
                     alpha=0.15)
    plt.tight_layout(pad=0.5)
    if save_figs:
        plt.savefig('{}_gpr_fcn_obs_small.pdf'.format(f.__name__),
                    format='pdf')
    else:
        plt.show()
    # plot GP regression fit w/ derivative observations
    plt.figure(figsize=(fig_w, fig_h))
    plt.axis(axis_limits)
    plt.tick_params(**tick_settings)
    plt.plot(xs, fx, 'r--', label='true')
    plt.plot(xtr, ytr, 'ko', ms=8, label='observed fcn values')
    plt.plot(xs, gp_mean_d, 'k-', lw=2, label='GP mean')
    plt.fill_between(xs,
                     gp_mean_d - 2 * gp_std_d,
                     gp_mean_d + 2 * gp_std_d,
                     color='k',
                     alpha=0.15)
    # plot line segments to indicate derivative observations
    h = 0.15
    for i in range(len(dtr)):
        x0, x1 = xtr[i] - h, xtr[i] + h
        y0 = dtr[i] * (x0 - xtr[i]) + ytr[i]
        y1 = dtr[i] * (x1 - xtr[i]) + ytr[i]
        plt.gca().add_line(Line2D([x0, x1], [y0, y1], linewidth=6, color='k'))
    plt.tight_layout(pad=0.5)
    if save_figs:
        plt.savefig('{}_gpr_grad_obs_small.pdf'.format(f.__name__),
                    format='pdf')
    else:
        plt.show()
예제 #53
0
    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = self.cov(comp_pend) + self.noise * np.eye(
                comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = (np.dot(
                pend_chol, npr.randn(pend.shape[0], self.pending_samples)) +
                         self.mean)

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1)
예제 #54
0
def calculate_continuous_ancestral_states(tree,
                                          char_mtx,
                                          sum_to_one=False,
                                          calc_std_err=False):
    """Calculates the continuous ancestral states for the nodes in a tree.

    Args:
        tree (Tree): A dendropy tree or TreeWrapper object.
        char_mtx (Matrix): A Matrix object with character information.  Each
            row should represent a tip in the tree and each column should be a
            variable to calculate ancestral state for.
        calc_std_err (:obj:`bool`, optional): If True, calculate standard error
            for each variable.  Defaults to False.
        sum_to_one (:obj:`bool`, optional): If True, standardize the character
            matrix so that the values in a row sum to one. Defaults to False.

    Raises:
        ValueError: Raised if none of the tree tips were found in the character data.

    Returns:
        A matrix of character data with the following dimensions:
            * rows: nodes / tips in the tree
            * columns: character variables
            * depth: first is the calculated value, second layer is standard
                error if desired

    Todo:
        * Add function for consistent label handling.
    """
    # Wrap tree if dendropy tree
    if not isinstance(tree, TreeWrapper):
        tree = TreeWrapper.from_base_tree(tree)

    # Assign labels to nodes that don't have them
    tree.add_node_labels()

    # Synchronize tree and character data
    # Prune tree
    prune_taxa = []
    keep_taxon_labels = []
    init_row_headers = char_mtx.get_row_headers()
    for taxon in tree.taxon_namespace:
        label = taxon.label.replace(' ', '_')
        if label not in init_row_headers:
            prune_taxa.append(taxon)
            print(
                'Could not find {} in character matrix, pruning'.format(label))
        else:
            keep_taxon_labels.append(label)

    if len(keep_taxon_labels) == 0:
        raise ValueError(
            'None of the tree tips were found in the character data')

    tree.prune_taxa(prune_taxa)
    tree.purge_taxon_namespace()

    # Prune character data
    keep_rows = []
    i = 0
    for label in init_row_headers:
        if label in keep_taxon_labels:
            keep_rows.append(i)
        else:
            print('Could not find {} in tree tips, pruning'.format(label))
        i += 1
    char_mtx = char_mtx.slice(keep_rows)

    # Standardize character matrix if requested
    tip_count, num_vars = char_mtx.shape
    if sum_to_one:
        for i in range(tip_count):
            sc = float(1.0) / np.sum(char_mtx[i])
            for j in range(num_vars):
                char_mtx[i, j] *= sc

    # Initialize data matrix
    num_nodes = len(tree.nodes())
    data_shape = (num_nodes, num_vars, 2 if calc_std_err else 1)
    data = np.zeros(data_shape, dtype=float)

    # Initialize headers
    row_headers = []

    tip_col_headers = char_mtx.get_column_headers()
    tip_row_headers = char_mtx.get_row_headers()
    tip_lookup = dict([(tip_row_headers[i].replace('_', ' '), i)
                       for i in range(tip_count)])

    # Get the number of internal nodes in the tree
    internal_node_count = num_nodes - tip_count
    # Loop through the tree and set the matrix index for each node
    # Also set data values
    node_headers = []
    node_i = tip_count
    tip_i = 0
    node_index_lookup = {}
    for node in tree.nodes():
        label = _get_node_label(node)
        if len(node.child_nodes()) == 0:
            # Tip
            node_index_lookup[label] = tip_i
            row_headers.append(label)
            data[tip_i, :, 0] = char_mtx[tip_lookup[label]]
            tip_i += 1
        else:
            node_index_lookup[label] = node_i
            node_headers.append(label)
            # Internal node
            data[node_i, :, 0] = np.zeros((1, num_vars), dtype=float)
            node_i += 1

    # Row headers should be extended with node headers
    row_headers.extend(node_headers)

    # For each variable
    for x in range(num_vars):
        # Compute the ML estimate of the root
        full_mcp = np.zeros((internal_node_count, internal_node_count),
                            dtype=float)
        full_vcp = np.zeros(internal_node_count, dtype=float)

        for k in tree.postorder_edge_iter():
            i = k.head_node
            if len(i.child_nodes()) != 0:
                node_num_i = node_index_lookup[_get_node_label(i)] - tip_count
                for j in i.child_nodes():
                    tbl = 2. / j.edge_length
                    full_mcp[node_num_i][node_num_i] += tbl
                    node_num_j = node_index_lookup[_get_node_label(j)]

                    if len(j.child_nodes()) == 0:
                        full_vcp[node_num_i] += (data[node_num_j, x, 0] * tbl)
                    else:
                        node_num_j -= tip_count
                        full_mcp[node_num_i][node_num_j] -= tbl
                        full_mcp[node_num_j][node_num_i] -= tbl
                        full_mcp[node_num_j][node_num_j] += tbl

        b = la.cho_factor(full_mcp)

        # these are the ML estimates for the ancestral states
        ml_est = la.cho_solve(b, full_vcp)
        sos = 0
        for k in tree.postorder_edge_iter():
            i = k.head_node
            node_num_i = node_index_lookup[_get_node_label(i)]
            if len(i.child_nodes()) != 0:
                data[node_num_i, x, 0] = ml_est[node_num_i - tip_count]

                if calc_std_err:
                    for j in i.child_nodes():
                        node_num_j = node_index_lookup[_get_node_label(j)]
                        temp = data[node_num_i, x, 0] - data[node_num_j, x, 0]
                        sos += temp * temp / j.edge_length

                    # nni is node_num_i adjusted for only nodes
                    nni = node_num_i - tip_count
                    qpq = full_mcp[nni][nni]
                    tm1 = np.delete(full_mcp, (nni), axis=0)
                    tm = np.delete(tm1, (nni), axis=1)
                    b = la.cho_factor(tm)
                    sol = la.cho_solve(b, tm1[:, nni])
                    temp_std_err = qpq - np.inner(tm1[:, nni], sol)
                    data[node_num_i, x, 1] = math.sqrt(
                        2.0 * sos / ((internal_node_count - 1) * temp_std_err))

    depth_headers = ['maximum_likelihood']
    if calc_std_err:
        depth_headers.append('standard_error')

    mtx_headers = {'0': row_headers, '1': tip_col_headers, '2': depth_headers}
    return tree, Matrix(data, headers=mtx_headers)
예제 #55
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)
        except np.linalg.LinAlgError:
            return (-np.inf,
                    np.zeros_like(theta)) if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        # Alg 2.1, page 19, line 3 -> alpha = L^T \ (L \ y)
        alpha = cho_solve((L, GPR_CHOLESKY_LOWER), y_train, check_finite=False)

        # Alg 2.1, page 19, line 7
        # -0.5 . y^T . alpha - sum(log(diag(L))) - n_samples / 2 log(2*pi)
        # y is originally thought to be a (1, n_samples) row vector. However,
        # in multioutputs, y is of shape (n_samples, 2) and we need to compute
        # y^T . alpha for each output, independently using einsum. Thus, it
        # is equivalent to:
        # for output_idx in range(n_outputs):
        #     log_likelihood_dims[output_idx] = (
        #         y_train[:, [output_idx]] @ alpha[:, [output_idx]]
        #     )
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        # the log likehood is sum-up across the outputs
        log_likelihood = log_likelihood_dims.sum(axis=-1)

        if eval_gradient:
            # Eq. 5.9, p. 114, and footnote 5 in p. 114
            # 0.5 * trace((alpha . alpha^T - K^-1) . K_gradient)
            # alpha is supposed to be a vector of (n_samples,) elements. With
            # multioutputs, alpha is a matrix of size (n_samples, n_outputs).
            # Therefore, we want to construct a matrix of
            # (n_samples, n_samples, n_outputs) equivalent to
            # for output_idx in range(n_outputs):
            #     output_alpha = alpha[:, [output_idx]]
            #     inner_term[..., output_idx] = output_alpha @ output_alpha.T
            inner_term = np.einsum("ik,jk->ijk", alpha, alpha)
            # compute K^-1 of shape (n_samples, n_samples)
            K_inv = cho_solve((L, GPR_CHOLESKY_LOWER),
                              np.eye(K.shape[0]),
                              check_finite=False)
            # create a new axis to use broadcasting between inner_term and
            # K_inv
            inner_term -= K_inv[..., np.newaxis]
            # Since we are interested about the trace of
            # inner_term @ K_gradient, we don't explicitly compute the
            # matrix-by-matrix operation and instead use an einsum. Therefore
            # it is equivalent to:
            # for param_idx in range(n_kernel_params):
            #     for output_idx in range(n_output):
            #         log_likehood_gradient_dims[param_idx, output_idx] = (
            #             inner_term[..., output_idx] @
            #             K_gradient[..., param_idx]
            #         )
            log_likelihood_gradient_dims = 0.5 * np.einsum(
                "ijl,jik->kl", inner_term, K_gradient)
            # the log likehood gradient is the sum-up across the outputs
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(axis=-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
예제 #56
0
def chol_solve(U, b):
    if isinstance(U, np.ndarray):
        if sparse.issparse(b):
            b = b.toarray()

        # Allocate memory
        U = np.atleast_2d(U)
        B = np.atleast_1d(B)
        sh_u = U.shape[:-2]
        sh_b = B.shape[:-1]
        l_u = len(sh_u)
        l_b = len(sh_b)

        # Check which axis are iterated over with B along with U
        ind_b = [Ellipsis] * l_b
        l_min = min(l_u, l_b)
        jnd_b = tuple(i for i in range(-l_min, 0) if sh_b[i] == sh_u[i])

        if out == None:
            # Shape of the result (broadcasting rules)
            sh = broadcasted_shape(sh_u, sh_b)
            #out = np.zeros(np.shape(B))
            out = np.zeros(sh + B.shape[-1:])
        for i in nested_iterator(np.shape(U)[:-2]):

            # The goal is to run Cholesky solver once for all vectors of B
            # for which the matrices of U are the same (according to the
            # broadcasting rules). Thus, we collect all the axes of B for
            # which U is singleton and form them as a 2-D matrix and then
            # run the solver once.

            # Select those axes of B for which U and B are not singleton
            for j in jnd_b:
                ind_b[j] = i[j]

            # Collect all the axes for which U is singleton
            b = B[tuple(ind_b) + (Ellipsis, )]

            # Reshape it to a 2-D (or 1-D) array
            orig_shape = b.shape
            if b.ndim > 1:
                b = b.reshape((-1, b.shape[-1]))

            # Ellipsis to all preceeding axes and ellipsis for the last
            # axis:
            if len(ind_b) < len(sh):
                ind_out = (Ellipsis, ) + tuple(ind_b) + (Ellipsis, )
            else:
                ind_out = tuple(ind_b) + (Ellipsis, )

            out[ind_out] = linalg.cho_solve((U[i], False),
                                            b.T).T.reshape(orig_shape)

        return out

    elif isinstance(U, cholmod.Factor):
        if sparse.issparse(b):
            b = b.toarray()
        return U.solve_A(b)
    else:
        raise ValueError("Unknown type of Cholesky factor")
예제 #57
0
    def Neglikelihood(self, theta):
        """Negative log-likelihood function

        Input
        -----
        theta (array): correlation legnths for different dimensions

        Output
        ------
        NegLnLike: Negative log-likelihood value"""

        theta = 10**theta  # Correlation length
        n = self.X.shape[0]  # Number of training instances
        k = self.X.shape[1]  # Number of dimensions

        if self.trend == 'Const':
            F = np.vstack((np.ones((n, 1)), np.zeros((n * k, 1))))
        else:
            print(
                'Other trends are currently not available, switch to "Const" instead'
            )
            F = np.vstack((np.ones((n, 1)), np.zeros((n * k, 1))))

        # Construct correlation matrix
        PsiDot = np.zeros(((k + 1) * n, (k + 1) * n))

        # 1-Build normal Psi matrix
        Psi = np.zeros((n, n))
        for i in range(n):
            Psi[i, :] = np.exp(
                -np.sum(theta * (self.X[i, :] - self.X)**2, axis=1))
        Psi = Psi + np.eye(n) * self.nugget
        # To avoid duplicate addition
        PsiDot[:n, :n] = Psi / 2

        # 2-Build dPsidX
        for i in range(k):
            PsiDot[:n, (i + 1) * n:(i + 2) *
                   n] = 2 * theta[i] * self.diff_list[i] * Psi

        # 3-Build d2PsidX2
        for i in range(k):
            # To avoid duplicate addition
            PsiDot[(i+1)*n:(i+2)*n, (i+1)*n:(i+2)*n] = \
            (2*theta[i]-4*theta[i]**2*self.diff_list[i]**2)*Psi/2

        # 4-Build d2PsidXdX
        for i in range(k - 1):
            for j in range(i + 1, k):
                PsiDot[(i+1)*n:(i+2)*n, (j+1)*n:(j+2)*n] = \
                -4*theta[i]*theta[j]*self.diff_list[i]*self.diff_list[j]*Psi

        # 5-Compile PsiDot
        PsiDot = PsiDot + PsiDot.T
        L = np.linalg.cholesky(PsiDot)

        # Mean estimation
        mu = np.linalg.solve(
            F.T @ (cho_solve((L, True), F)), F.T @ (cho_solve(
                (L, True), np.vstack((self.y, self.grad)))))

        # Variance estimation
        SigmaSqr = (np.vstack((self.y, self.grad))-F@mu).T @ \
        (cho_solve((L, True), np.vstack((self.y, self.grad))-F@mu)) / ((k+1)*n)

        # Compute log-likelihood
        LnDetK = 2 * np.sum(np.log(np.abs(np.diag(L))))
        NegLnLike = ((k + 1) * n / 2) * np.log(SigmaSqr) + 0.5 * LnDetK

        # Update attributes
        self.PsiDot, self.F, self.L, self.mu, self.SigmaSqr = PsiDot, F, L, mu, SigmaSqr

        return NegLnLike.flatten()
예제 #58
0
 def solve(self, M):
     return LA.cho_solve((self.chol(), True), M)
def cal_lf0(config):

    base_path = config['base_path']
    label_path = config['label_path']
    name = config['name']
    outfilepath = config['outfilepath']
    var_path = config['var_path']
    syllable_base_path = config['syllable_base_path']
    syllable_var_path = config['syllable_var_path']
    original = config['original']
    koriyama_gen = config['koriyama_gen']
    figure_path = config['figure_path']
    ph_in_syl_object_path = config['phone_in_syllable_object_path']
    stress = config['stress']
    original_vuv = config['original_vuv']

    p_in_s_file = Utility.load_obj(ph_in_syl_object_path)

    # vuv = np.load('{}/class.npy'.format(config['vuv_path']))
    vuv = original_vuv

    #--------Frame-------#

    lf0_mean = np.load('{}/mean.npy'.format(base_path))
    lf0_cov = np.load('{}/cov.npy'.format(base_path))

    var = np.load('{}'.format(var_path))

    if len(lf0_cov) > len(vuv):
        for i in range(len(lf0_cov) - len(vuv)):
            vuv.append(-1, axis=0)
    elif len(lf0_cov) < len(vuv):
        vuv = vuv[0:len(lf0_cov)]

    lf0_var = np.sum(var, axis=0)

    lf0_mean = np.array([lf0_mean[:, 0], lf0_mean[:, 1], lf0_mean[:, 2]])
    lf0_w = PoGUtility.generate_W_for_GPR_generate_features(len(lf0_cov), vuv)

    frame_B = alpha * PoGUtility.cal_sum_of_mean_part(lf0_var, lf0_w, lf0_cov,
                                                      lf0_mean)
    frame_A = alpha * PoGUtility.cal_sum_of_weight_part(
        lf0_var, lf0_w, lf0_cov)

    L = linalg.cholesky(frame_A, lower=True)
    lf0 = linalg.cho_solve((L, True), frame_B)

    # lf0 = lf0_gen_with_vuv(lf0, vuv)
    print lf0.shape

    frame_lf0_nomask = lf0

    # lf0 = lf0_gen_with_vuv(lf0, vuv)

    lf0[lf0 < 1] = np.nan

    frame_lf0 = np.copy(lf0)

    #----------Syllable level--------#

    dur_list, names = PoGUtility.gen_dur_and_name_list(label_path, name)

    # print np.sum(dur_list)
    if np.sum(dur_list) < len(original):
        dur_list[0] = dur_list[0] + len(original) - np.sum(dur_list)
    # print np.sum(dur_list)

    syl_mean = np.load('{}/mean.npy'.format(syllable_base_path))
    syl_cov = np.load('{}/cov.npy'.format(syllable_base_path))

    s_mean = syl_mean

    var = np.load('{}'.format(syllable_var_path))
    syl_var = np.sum(var, axis=0)

    temp_mean = []
    for i in range(len(syl_mean[0])):
        temp_mean.append(syl_mean[:, i])
    syl_mean = np.array(temp_mean)

    syl_w = PoGUtility.generate_DCT_W_without_consonant_on_stress(
        len(lf0_cov), dur_list, num_coeff, p_in_s_file, stress)

    syl_B = beta * PoGUtility.cal_sum_of_mean_part(syl_var, syl_w, syl_cov,
                                                   syl_mean)
    syl_A = beta * PoGUtility.cal_sum_of_weight_part(syl_var, syl_w, syl_cov)

    #----------Combine Model--------#

    L = linalg.cholesky(frame_A + syl_A, lower=True)
    lf0 = linalg.cho_solve((L, True), frame_B + syl_B)

    # print lf0.shape

    lf0[lf0 < 1] = np.nan

    PlotUtility.plot([lf0, original, frame_lf0_nomask],
                     ['Multi', 'original', 'Single'],
                     '{}/{}_no_mask.eps'.format(figure_path, name))

    lf0 = lf0_gen_with_vuv(lf0, vuv)
    lf0[lf0 < 1] = np.nan

    frame_lf0 = lf0_gen_with_vuv(frame_lf0, vuv)
    frame_lf0[frame_lf0 < 1] = np.nan

    np.save(outfilepath, lf0)

    print min(lf0)

    PlotUtility.plot([lf0, original, frame_lf0],
                     ['Multi', 'original', 'Single'],
                     '{}/{}_multi.eps'.format(figure_path, name))

    #----------Combine Model--------#

    o = []
    for data_dct, dur in zip(s_mean, dur_list):
        i_dct = PoGUtility.generate_inverse_DCT(data_dct, dur)
        o = o + i_dct

    o = np.concatenate((np.zeros(len(original) - len(o)), np.array(o)), axis=0)

    o = lf0_gen_with_vuv(o, vuv)
    o[o <= 1] = np.nan
    # print o.shape

    PlotUtility.plot([o, original, lf0, frame_lf0],
                     ['dct', 'original', 'Multi', 'frame_lf0'],
                     '{}/{}_dct.eps'.format(figure_path, name))

    pass
예제 #60
0
# so we need to do some tempering, plus a little
# nonlinear refinement.
T = 5000.0
dlogT = -0.025
niter = 2000
lr = 3e-4

# Pre-compute the GP on the spectral components
if s0_rho > 0.0:
    kernel = celerite.terms.Matern32Term(np.log(s0_sig), np.log(s0_rho))
    gp = celerite.GP(kernel)
    s0_C = gp.get_matrix(lnlam_padded)
else:
    s0_C = np.eye(Kp) * s0_sig**2
s0_cho_C = cho_factor(s0_C)
s0_CInv = cho_solve(s0_cho_C, np.eye(Kp))
s0_CInvmu = cho_solve(s0_cho_C, np.ones(Kp) * s0_mu)
if s1_rho > 0.0:
    kernel = celerite.terms.Matern32Term(np.log(s1_sig), np.log(s1_rho))
    gp = celerite.GP(kernel)
    s1_C = gp.get_matrix(lnlam_padded)
else:
    s1_C = np.eye(Kp) * s1_sig**2
s1_cho_C = cho_factor(s1_C)
s1_CInv = cho_solve(s1_cho_C, np.eye(Kp))
s1_CInvmu = cho_solve(s1_cho_C, np.ones(Kp) * s1_mu)
s_CInv = dense_block_diag(s0_CInv, s1_CInv)
s_CInvmu = np.append(s0_CInvmu, s1_CInvmu)


# Define the model