Example #1
0
def balreal(sys):
    """Computes the balanced realization of sys and returns its eigenvalues.

    References:
        [1] http://www.mathworks.com/help/control/ref/balreal.html

        [2] Laub, A.J., M.T. Heath, C.C. Paige, and R.C. Ward, "Computation of
            System Balancing Transformations and Other Applications of
            Simultaneous Diagonalization Algorithms," *IEEE Trans. Automatic
            Control*, AC-32 (1987), pp. 115-122.
    """
    sys = LinearSystem(sys)  # cast first to memoize sys2ss
    if not sys.analog:
        raise NotImplementedError("balanced digital filters not supported")

    R = control_gram(sys)
    O = observe_gram(sys)

    LR = cholesky(R, lower=True)
    LO = cholesky(O, lower=True)

    U, S, V = svd(np.dot(LO.T, LR))

    T = np.dot(LR, V.T) * S ** (-1. / 2)
    Tinv = (S ** (-1. / 2))[:, None] * np.dot(U.T, LO.T)

    return similarity_transform(sys, T, Tinv), S
Example #2
0
def log_mvnpdf(X, means, covars, min_covar=1.e-7):
    """Log probability for full covariance matrices."""
    n_samples, n_dim = X.shape
    nmix = len(means)
    log_prob = np.empty((n_samples, nmix))
    for c, (mu, cv) in enumerate(zip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            # The model is most probably stuck in a component with too
            # few observations, we need to reinitialize this components
            try:
                cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
                                          lower=True)
            except linalg.LinAlgError:
                raise ValueError("'covars' must be symmetric, "
                                 "positive-definite")

        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, 
lower=True).T
        log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) +
                                 n_dim * np.log(2 * np.pi) + cv_log_det)

    return log_prob
Example #3
0
File: mvn.py Project: bayerj/utils
def contour_2d(mu, cov=None, prec=None, 
        n=100, radius=[1, np.sqrt(6)]):
    """
    Assuming a bivariate normal
    distribution, draw contours,
    given 'radius' information.

    Note:
    sqrt(6) covers roughly 95% of probability
    mass (in 2d), given the fact that the mahalanobis
    distribution is chi-squared distributed.
    """
    mu = mu.reshape(2,1) 
    t = np.linspace(0, 2*np.pi, n)
    circle = np.array([np.cos(t), np.sin(t)])
    if prec is None:
        L = la.cholesky(cov)
        ellipse = np.dot(L, circle)
    else:
        L = la.cholesky(prec)
        ellipse = la.solve_triangular(L, circle)
        # FIXME: not correct yet
    plots = {}
    for r in radius:
        plots[r] = (r*ellipse[0,:] + mu[0], r*ellipse[1,:] + mu[1])
    return plots
Example #4
0
    def LMLdebug(self):
        """
        LML function for debug
        """
        assert self.N*self.P<5000, 'gp2kronSum:: N*P>=5000'

        y = SP.reshape(self.Y,(self.N*self.P), order='F') 
        V = SP.kron(SP.eye(self.P),self.F)

        XX = SP.dot(self.Xr,self.Xr.T)
        K  = SP.kron(self.Cr.K(),XX)
        K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N))

        # inverse of K
        cholK = LA.cholesky(K)
        Ki = LA.cho_solve((cholK,False),SP.eye(self.N*self.P))

        # Areml and inverse
        Areml = SP.dot(V.T,SP.dot(Ki,V))
        cholAreml = LA.cholesky(Areml)
        Areml_i = LA.cho_solve((cholAreml,False),SP.eye(self.K*self.P))

        # effect sizes and z
        b = SP.dot(Areml_i,SP.dot(V.T,SP.dot(Ki,y)))
        z = y-SP.dot(V,b)
        Kiz = SP.dot(Ki,z)

        # lml
        lml  = y.shape[0]*SP.log(2*SP.pi)
        lml += 2*SP.log(SP.diag(cholK)).sum()
        lml += 2*SP.log(SP.diag(cholAreml)).sum()
        lml += SP.dot(z,Kiz)
        lml *= 0.5

        return lml
def genBrownianMotion (n, tMax=10.0):
    tSeq = np.arange (tMax/float(500),
                    tMax*(1+1/float(500)), tMax/float(500));
    sig = np.zeros ((500,500), dtype='float64');
    for i in range (500):
        sig[i,0:i] = tSeq[0:i];
        sig[i,i:] = tSeq[i];
    sigSqrt = LA.cholesky (sig, lower=True);
    for j in xrange(n/500):
        z = np.dot (sigSqrt, nr.randn (500));
        if j == 0:
            zTot = np.insert (z, 0, 0);
        else:
            z = z + zTot[-1]
            zTot = np.append(zTot, z)
    m = n % 500  - 1
    tSeq = np.arange (tMax/float(m),
                    tMax*(1+1/float(m)), tMax/float(m));
    sig = np.zeros ((m,m), dtype='float64');
    for i in range (m):
        sig[i,0:i] = tSeq[0:i];
        sig[i,i:] = tSeq[i];
    print(sig)
    sigSqrt = LA.cholesky (sig, lower=True);
    z = np.dot (sigSqrt, nr.randn (m));
    z = z + zTot[-1]
    zTot = np.append(zTot, z)
    return zTot
def choleskyjitter(A):
	
    try:
        return(la.cholesky(A, lower = True))
    except Exception:
        pass

    n = len(A)
    maxscale = 10*np.sum(A.diagonal())
    minscale = min(1/64, maxscale/1024)
    scale = minscale

    if VERBOSE:
    	print('\t', 'Jittering...')

    while scale < maxscale:

        try:
        	jitA = scale * np.diag(np.random.rand(n))
        	L = la.cholesky(A + jitA, lower = True)
        	return(L)
        except Exception as e:
        	scale += minscale

    raise ValueError("Jittering failed")
Example #7
0
 def test_check_finite(self):
     a = [[8, 2, 3], [2, 9, 3], [3, 3, 6]]
     c = cholesky(a, check_finite=False)
     assert_array_almost_equal(dot(transpose(c), c), a)
     c = transpose(c)
     a = dot(c, transpose(c))
     assert_array_almost_equal(cholesky(a, lower=1, check_finite=False), c)
Example #8
0
    def _update(self):
        sn2 = self._likelihood.s2
        su2 = sn2 / 1e6

        # kernel wrt the inducing points.
        Kuu = self._kernel.get(self._U)
        p = self._U.shape[0]

        # cholesky for the information gain. note that we only need to compute
        # this once as it is independent from the data.
        self._L = sla.cholesky(Kuu + su2 * np.eye(p))

        # evaluate the kernel and residuals at the new points
        Kux = self._kernel.get(self._U, self._X)
        kxx = self._kernel.dget(self._X)
        r = self._y - self._mean

        # the cholesky of Q.
        V = sla.solve_triangular(self._L, Kux, trans=True)

        # rescale everything by the diagonal matrix ell.
        ell = np.sqrt(kxx + sn2 - np.sum(V**2, axis=0))
        Kux /= ell
        V /= ell
        r /= ell

        # NOTE: to update things incrementally all we need to do is store these
        # components. A just needs to be initialized at the identity and then
        # we just accumulate here.
        self._A = np.eye(p) + np.dot(V, V.T)
        self._a = np.dot(Kux, r)

        # update the posterior.
        self._R = np.dot(sla.cholesky(self._A), self._L)
        self._b = sla.solve_triangular(self._R, self._a, trans=True)
Example #9
0
def geigen(Amat, Bmat, Cmat):
    """
    generalized eigenvalue problem of the form

    max tr L'AM / sqrt(tr L'BL tr M'CM) w.r.t. L and M

    :param Amat numpy ndarray of shape (M,N)
    :param Bmat numpy ndarray of shape (M,N)
    :param Bmat numpy ndarray of shape (M,N)

    :rtype: numpy ndarray
    :return values: eigenvalues
    :return Lmat: left eigenvectors
    :return Mmat: right eigenvectors

    """
    if Bmat.shape[0] != Bmat.shape[1]:
        print("BMAT is not square.\n")
        sys.exit(1)

    if Cmat.shape[0] != Cmat.shape[1]:
        print("CMAT is not square.\n")
        sys.exit(1)

    p = Bmat.shape[0]
    q = Cmat.shape[0]

    s = min(p, q)
    tmp = fabs(Bmat - Bmat.transpose())
    tmp1 = fabs(Bmat)
    if tmp.max() / tmp1.max() > 1e-10:
        print("BMAT not symmetric..\n")
        sys.exit(1)

    tmp = fabs(Cmat - Cmat.transpose())
    tmp1 = fabs(Cmat)
    if tmp.max() / tmp1.max() > 1e-10:
        print("CMAT not symmetric..\n")
        sys.exit(1)

    Bmat = (Bmat + Bmat.transpose()) / 2.
    Cmat = (Cmat + Cmat.transpose()) / 2.
    Bfac = cholesky(Bmat)
    Cfac = cholesky(Cmat)
    Bfacinv = inv(Bfac)
    Bfacinvt = Bfacinv.transpose()
    Cfacinv = inv(Cfac)
    Dmat = Bfacinvt.dot(Amat).dot(Cfacinv)
    if p >= q:
        u, d, v = svd(Dmat)
        values = d
        Lmat = Bfacinv.dot(u)
        Mmat = Cfacinv.dot(v.transpose())
    else:
        u, d, v = svd(Dmat.transpose())
        values = d
        Lmat = Bfacinv.dot(u)
        Mmat = Cfacinv.dot(v.transpose())

    return values, Lmat, Mmat
Example #10
0
def choleskyjitter(A, overwrite_a = False, check_finite = True):

    """Add jitter stochastically until a positive definite matrix occurs"""
    # Avoid preparing for jittering if we can already find the cholesky 
    # with no problem
    try:
        return la.cholesky(A, lower = True, overwrite_a = overwrite_a, 
                check_finite = check_finite)
    except Exception:
        pass

    # Prepare for jittering (all the magic numbers here are arbitary...)
    n = A.shape[0]
    maxscale = 1e10
    minscale = 1e-4
    scale = minscale

    # Keep jittering stochastically, increasing the jitter magnitude along 
    # the way, until it's all good
    while scale < maxscale:

        try:
            jitA = scale * np.diag(np.random.rand(n))
            L = la.cholesky(A + jitA, lower = True, overwrite_a = overwrite_a, 
                check_finite = check_finite)
            return L
        except la.LinAlgError:
            scale *= 1.01
            log.warning('Jitter added stochastically. Scale: %f!' % scale)

    raise la.LinAlgError("Max value of jitter reached")
    def _initialize(self, X, resp):
        """Initialization of the Gaussian mixture parameters.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)

        resp : array-like, shape (n_samples, n_components)
        """
        n_samples, _ = X.shape

        weights, means, covariances = _estimate_gaussian_parameters(
            X, resp, self.reg_covar, self.covariance_type)
        weights /= n_samples

        self.weights_ = (weights if self.weights_init is None
                         else self.weights_init)
        self.means_ = means if self.means_init is None else self.means_init

        if self.precisions_init is None:
            self.covariances_ = covariances
            self.precisions_cholesky_ = _compute_precision_cholesky(
                covariances, self.covariance_type)
        elif self.covariance_type == 'full':
            self.precisions_cholesky_ = np.array(
                [linalg.cholesky(prec_init, lower=True)
                 for prec_init in self.precisions_init])
        elif self.covariance_type == 'tied':
            self.precisions_cholesky_ = linalg.cholesky(self.precisions_init,
                                                        lower=True)
        else:
            self.precisions_cholesky_ = self.precisions_init
Example #12
0
def log_multivariate_normal_density(X, means, covars, min_covar=1.e-7):
    """Log probability for full covariance matrices. """
    if hasattr(linalg, 'solve_triangular'):
        # only in scipy since 0.9
        solve_triangular = linalg.solve_triangular
    else:
        # slower, but works
        solve_triangular = linalg.solve
    n_samples, n_dim = X.shape
    nmix = len(means)
    log_prob = np.empty((n_samples, nmix))
    for c, (mu, cv) in enumerate(itertools.izip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            # The model is most probabily stuck in a component with too
            # few observations, we need to reinitialize this components
            cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
                                      lower=True)
        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = solve_triangular(cv_chol, (X - mu).T, lower=True).T
        log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + \
                                     n_dim * np.log(2 * np.pi) + cv_log_det)

    return log_prob
Example #13
0
def isposdef(X):
    "Return if matrix is positive definite. Relies on cholesky decomp"
    try:
        la.cholesky(X)  # will raise LinAlgError if not positive def
        return True
    except la.LinAlgError:
        return False
Example #14
0
 def test_simple(self):
     a = [[8,2,3],[2,9,3],[3,3,6]]
     c = cholesky(a)
     assert_array_almost_equal(dot(transpose(c),c),a)
     c = transpose(c)
     a = dot(c,transpose(c))
     assert_array_almost_equal(cholesky(a,lower=1),c)
Example #15
0
def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False):
    N,P = Y.shape
    """ initilizes parameters """
    RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose)
    Cg = covariance.freeform(2)
    Cn = covariance.freeform(2)
    gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX)
    conv2 = SP.ones((P,P),dtype=bool)
    rho_g = SP.ones((P,P))
    rho_n = SP.ones((P,P))
    for p1 in range(P):
        for p2 in range(p1):
            if verbose:
                print '.. fitting correlation (%d,%d)'%(p1,p2)
            gp.setY(Y[:,[p1,p2]])
            Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])])
            Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])])
            params0 = {'Cg':Cg_params0,'Cn':Cn_params0}
            conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3)
            rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod())
            rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod())
            conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2]
    RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T))
    RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T))
    RV['conv2'] = conv2
    #3. regularizes covariance matrices
    offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4
    offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4
    RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P)
    RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P)
    RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)]
    RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)]
    return RV
Example #16
0
def simulate_lu_decom(sim_locations,sample_locations,vmodel):
    c11 = fill_cova(sample_locations,None,vmodel)
    c21 = fill_cova(sim_locations,sample_locations,vmodel)
    c22 = fill_cova(sim_locations,None,vmodel)

    u11 = cholesky(c11)
    l11 = u11.T
    u11_inv = inv(u11)

    l21 = c21 @ u11_inv
    u12 = l21.T

    l22 = cholesky(c22-l21@u12,lower=True)

    return u11_inv.T,l21,l22



    l11,u11 = lu(c11,permute_l= True)

    l11_inv = inv(l11)
    a21t = l11_inv @ c21.T
    a21 = a21t.T
    b12 = a21t

    l22,u22 = lu(c22-l21@u12,permute_l= True)

    return a21,l11_inv,l22
Example #17
0
    def update(self, ncov):
        """updates the covariance matrix and recalculates internals

        :Parameters:
            ncov : ndarray
                symetric matrix, noise covariance
        """

        # checks
        if ncov.ndim != 2 or ncov.shape[0] != ncov.shape[1]:
            raise ValueError('noise covariance is not a symmetric, '
                             'pos. definite matrix')

        # inits
        self.input_dim = ncov.shape[0]
        self._ncov = ncov
        self._chol_ncov = None
        self._inv_chol_ncov = None

        # compute cholesky decomposition
        try:
            self._chol_ncov = sp_la.cholesky(self._ncov)
        except:
            self._ncov = coloured_loading(self._ncov, 50)
            self._chol_ncov = sp_la.cholesky(self._ncov)
            # invert
        self._inv_chol_ncov = sp_la.inv(self._chol_ncov)

        # set ready flag
        self._is_ready = True
Example #18
0
File: linalg.py Project: Dalar/GPy
def jitchol_old(A, maxtries=5):
    """
    :param A: An almost pd square matrix

    :rval L: the Cholesky decomposition of A

    .. note:

      Adds jitter to K, to enforce positive-definiteness
      if stuff breaks, please check:
      np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)

    """
    try:
        return linalg.cholesky(A, lower=True)
    except linalg.LinAlgError:
        diagA = np.diag(A)
        if np.any(diagA < 0.):
            raise linalg.LinAlgError, "not pd: negative diagonal elements"
        jitter = diagA.mean() * 1e-6
        for i in range(1, maxtries + 1):
            print '\rWarning: adding jitter of {:.10e}                        '.format(jitter),
            try:
                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
            except:
                jitter *= 10

        raise linalg.LinAlgError, "not positive definite, even with jitter."
Example #19
0
    def compute(self, x, yerr):
        """
        Compute and factorize the covariance matrix.

        :param x: ``(nsamples, ndim)``
            The independent coordinates of the data points.

        :param yerr: (optional) ``(nsamples,)`` or scalar
            The Gaussian uncertainties on the data points at coordinates
            ``x``. These values will be added in quadrature to the diagonal of
            the covariance matrix.

        """
        # Compute the kernel matrix.
        K = self.kernel.value(x)
        K[np.diag_indices_from(K)] += yerr ** 2

        # Factor the matrix and compute the log-determinant.
        # TODO CPD 25.01.15: added check on cholesky ala sklearn to account for
        # linalg error...
        # Also removed overwrite_a
        try:
            self._factor = (cholesky(K), False)
        except LinAlgError:
            self._factor = (cholesky(K + min_covar * np.eye(K.shape[0])), False)
        ##self._factor = (cholesky(K, overwrite_a=True, lower=False), False)
        self.log_determinant = 2 * np.sum(np.log(np.diag(self._factor[0])))
        self.computed = True
Example #20
0
def get_sigma_points_mean_cov(pts, weights):
    '''Find mean and covariance of a set of weighted sigma points pts
    
    Returns
    -------
        mean, cov
            np arrays (n,1) and (n,n) respectively
    '''

    n = pts.shape[0]
    
    n_pts = pts.shape[1]
    
    mean = np.sum(pts*weights, axis=1)[:,np.newaxis]
    cov = np.dot(weights*(pts-mean), (pts-mean).T)
    
    # Sometimes if kappa < 0, cov may become non positive semi-definite. If so, 
    # approximate 'covariance' matrix according to UKF paper Julier 1997
    try: # check positive semi-definiteness
        la.cholesky(cov)
    except la.LinAlgError:
        print 'Covariance matrix is not positive semi-definite, aproximating...'
        # take 'covariance' about propagated 0th sigma point instead of new mean
        X0 = pts[:,0,np.newaxis] # first sigma point
        cov = np.dot(weights*(pts-X0), (pts-X0).T) 
        # Check positive semi-definiteness again (should always be)
        la.cholesky(cov)# will throw LinAlgError if not
        
    return mean, cov
Example #21
0
 def __init__(self, mean, sigma):
     self.mean = mean
     self.sigma = sigma
     self.sigmainv = sigmainv
     self.cholsigma = linalg.cholesky(sigma)
     # the following makes it lower triangular with increasing time
     self.cholsigmainv = linalg.cholesky(sigmainv)[::-1, ::-1]
Example #22
0
def jitChol(A, maxTries=10, warning=True):

    """Do a Cholesky decomposition with jitter.
    
    Description:
    
    
    U, jitter = jitChol(A, maxTries, warning) attempts a Cholesky
     decomposition on the given matrix, if matrix isn't positive
     definite the function adds 'jitter' and tries again. Thereafter
     the amount of jitter is multiplied by 10 each time it is added
     again. This is continued for a maximum of 10 times.  The amount of
     jitter added is returned.
     Returns:
      U - the Cholesky decomposition for the matrix.
      jitter - the amount of jitter that was added to the matrix.
     Arguments:
      A - the matrix for which the Cholesky decomposition is required.
      maxTries - the maximum number of times that jitter is added before
       giving up (default 10).
      warning - whether to give a warning for adding jitter (default is True)

    See also
    CHOL, PDINV, LOGDET


    Copyright (c) 2005, 2006 Neil D. Lawrence
    
    """
    warning = True
    jitter = 0
    i = 0

    while(True):
        try:
            # Try --- need to check A is positive definite
            if jitter == 0:
                jitter = abs(SP.trace(A))/A.shape[0]*1e-6
                LC = linalg.cholesky(A, lower=True)
                return LC.T, 0.0
            else:
                if warning:
                    # pdb.set_trace()
		    # plt.figure()
		    # plt.imshow(A, interpolation="nearest")
		    # plt.colorbar()
		    # plt.show()
                    logging.error("Adding jitter of %f in jitChol()." % jitter)
                LC = linalg.cholesky(A+jitter*SP.eye(A.shape[0]), lower=True)

                return LC.T, jitter
        except linalg.LinAlgError:
            # Seems to have been non-positive definite.
            if i<maxTries:
                jitter = jitter*10
            else:
                raise linalg.LinAlgError, "Matrix non positive definite, jitter of " +  str(jitter) + " added but failed after " + str(i) + " trials."
        i += 1
    return LC
    def posterior_y(self, predict_times=None, predict_periods=None):
        
        K_train = self.s_y * self.K_y      
        K_gold = K_train[self.goldidxs, :][:, self.goldidxs]  
        
        if not np.any(predict_times) or not np.any(predict_periods):
            K_predict = K_train
            silveridxs = self.silveridxs
            testidxs = self.silveridxs
        else:
            predict_times = np.concatenate((self.times[self.silveridxs], predict_times), axis=0)
            distances = predict_times - predict_times.T # Ntest x N
            nonmatchingperiods = (predict_periods - predict_periods.T) != 0
            distances[nonmatchingperiods] = np.inf
            K_predict = self.sqexpkernel(distances, self.l_y) + 1e-6 * np.eye(self.N)
            
            silveridxs = np.arange(1, np.sum(self.silveridxs))
            testidxs = np.arange(np.sum(self.silveridxs), len(predict_times))            
        
        # update the prior mean
        v_obs_y = np.var(self.y)
        self.mu0_y = (self.m_mu0_y * v_obs_y + np.mean(self.y) * self.v_mu0_y) / (self.v_mu0_y + v_obs_y)
        print "mu0_y = %.3f" % self.mu0_y
        
        # learn from the training labels
        innovation = self.y[self.goldidxs, :] - self.mu0_y
        L_y = cholesky(K_gold, lower=True, check_finite=False)
        B = solve_triangular(L_y, innovation, lower=True, overwrite_b=True, check_finite=False)
        A = solve_triangular(L_y.T, B, overwrite_b=True, check_finite=False)
        V = solve_triangular(L_y, K_predict[:, self.goldidxs].T, lower=True, check_finite=False)
        
        mu = self.mu0_y + K_predict[testidxs][:, self.goldidxs].dot(A)                
        cov = K_predict - V.T.dot(V)
        # now update the test indexes from the x  observations
        for f in range(self.F):
            mu_fminus1 = mu
            cov_f = cov[silveridxs][:, silveridxs]# + 1e-6 * np.eye(len(mu)) # jitter
            
            innovation = self.x[self.silveridxs, f:f+1] - (mu_fminus1 * self.a[self.silveridxs, f:f+1] 
                                                     + self.c[self.silveridxs, f:f+1] + self.e[self.silveridxs, f:f+1]) # observation minus prior over forecasters' predictions
            print np.min(innovation)
            a_diag = np.diag(self.a[self.silveridxs, f])

            var_a = np.diag(np.diag(self.cov_a[f, self.silveridxs][:, self.silveridxs]))
            var_a = np.diag(mu_fminus1.reshape(-1)).dot(var_a).dot(np.diag(mu_fminus1.reshape(-1)).T)
            var_e = np.diag(np.diag(self.cov_e[f][self.silveridxs][:, self.silveridxs]))
            var_c = np.diag(np.diag(self.cov_c[f, self.silveridxs][:, self.silveridxs]))
            S_y = cov_f + var_a + var_e + var_c 
            
            L_y = cholesky(S_y, lower=True, check_finite=False)
            
            B = solve_triangular(L_y, innovation, lower=True, overwrite_b=True, check_finite=False)
            A = solve_triangular(L_y.T, B, overwrite_b=True, check_finite=False)
            V = solve_triangular(L_y, a_diag.dot(cov[silveridxs, :]), lower=True, overwrite_b=True, check_finite=False)
        
            mu = mu_fminus1 + cov[silveridxs][:, silveridxs].dot(a_diag).dot(A)
            cov = cov - V.T.dot(V)
         
        return mu, cov[testidxs][:, testidxs]
Example #24
0
 def test_simple_complex(self):
     m = array([[3+1j,3+4j,5],[0,2+2j,2+7j],[0,0,7+4j]])
     a = dot(transpose(conjugate(m)),m)
     c = cholesky(a)
     a1 = dot(transpose(conjugate(c)),c)
     assert_array_almost_equal(a,a1)
     c = transpose(c)
     a = dot(c,transpose(conjugate(c)))
     assert_array_almost_equal(cholesky(a,lower=1),c)
Example #25
0
def factor(X, rho, mu=0.0):
    n, d = X.shape

    if n >= d:
        L = la.cholesky((2. / n) * np.dot(X.T, X) + (2. * mu + rho) * np.eye(d), lower=True)
    else:
        L = la.cholesky(np.eye(n) + (2. / (rho * n)) * np.dot(X, X.T), lower=True)

    return L, L.T  # L, U
def pseudo_determinant(S, thres=1e-3, min_covar=1.e-7):
    n_dim = S.shape[0]
    try:
        S_chol = linalg.cholesky(S, lower=True)
    except linalg.LinAlgError:
        # The model is most probably stuck in a component with too
        # few observations, we need to reinitialize this components
        S_chol = linalg.cholesky(S + min_covar * np.eye(n_dim),
                                  lower=True)
    S_chol_diag = np.diag(S_chol)

    return np.prod(S_chol_diag[S_chol_diag>thres]) ** 2
Example #27
0
def mvnrewards(nstates, nactions, mu, covmat):
    """Create a random reward structure for an (nstates, nactions) MDP
    where the rewards for each pair of tasks are correlated according
    to the specified covariance matrix."""
    # make sure covmat is positive definite; raise an exception if it
    # isn't. Note that the multivariate_normal call succeeds either way
    # but the results aren't physically meaningful if the matrix isn't
    # semi-positive definite, and we'd rather bail than generate data
    # that doesn't match what the user asked for.
    sla.cholesky(covmat)
    ntasks = covmat.shape[0]
    rewards = npr.multivariate_normal(mu, covmat, (nstates, nactions))
    return rewards
Example #28
0
def _log_multivariate_normal_density_full(X, means, covars, min_covar=1.e-7):
    n_samples, n_dim = X.shape
    nmix = len(means)
    log_prob = np.empty((n_samples, nmix))
    for c, (mu, cv) in enumerate(zip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim), lower=True)
        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
        log_prob[:, c] = - .5 * (np.sum(cv_sol ** 2, axis=1) + n_dim * np.log(2 * np.pi) + cv_log_det)
    return log_prob
Example #29
0
 def test_random_complex(self):
     n = 20
     for k in range(2):
         m = random([n,n])+1j*random([n,n])
         for i in range(n):
             m[i,i] = 20*(.1+abs(m[i,i]))
         a = dot(transpose(conjugate(m)),m)
         c = cholesky(a)
         a1 = dot(transpose(conjugate(c)),c)
         assert_array_almost_equal(a,a1)
         c = transpose(c)
         a = dot(c,transpose(conjugate(c)))
         assert_array_almost_equal(cholesky(a,lower=1),c)
Example #30
0
 def test_random(self):
     n = 20
     for k in range(2):
         m = random([n,n])
         for i in range(n):
             m[i,i] = 20*(.1+m[i,i])
         a = dot(transpose(m),m)
         c = cholesky(a)
         a1 = dot(transpose(c),c)
         assert_array_almost_equal(a,a1)
         c = transpose(c)
         a = dot(c,transpose(c))
         assert_array_almost_equal(cholesky(a,lower=1),c)
Example #31
0
    def grad_optimize_ei(self, cand, comp, pend, vals, compute_grad=True):
        if pend.shape[0] == 0:
            best = np.min(vals)
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp, cand)

            # Predictive things.
            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            if not compute_grad:
                return ei

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve((obsv_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m +
                                         grad_xp_v * g_ei_s2)
            ei = -np.sum(ei)

            return ei, grad_xp.flatten()

        else:
            # If there are pending experiments, fantasize their outcomes.
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = (self.cov(comp_pend) +
                             self.noise * np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(
                pend_chol, npr.randn(pend.shape[0],
                                     self.pending_samples)) + pend_m[:, None]

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)
            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve(
                    (comp_pend_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (
                grad_xp_m * np.tile(g_ei_m, (comp.shape[1], 1)).T +
                (grad_xp_v.T * g_ei_s2).T)
            ei = -np.mean(ei, axis=1)
            grad_xp = np.mean(grad_xp, axis=0)

            return ei, grad_xp.flatten()
Example #32
0
 def chol_metric(self, state):
     return sla.cholesky(self.metric(state), True)
Example #33
0
def sim_frac_gaussian_noise(n_seconds, fs, chi=0, hurst=None):
    """Simulate a timeseries as fractional gaussian noise.

    Parameters
    ----------
    n_seconds : float
        Simulation time, in seconds.
    fs : float
        Sampling rate of simulated signal, in Hz.
    chi: float, optional, default: 0
        Desired power law exponent of the spectrum of the signal.
        Must be in the range (-1, 1).
    hurst : float, optional, default: None
        Desired Hurst parameter, which must be in the range (0, 1).
        If provided, this value overwrites the `chi` parameter.

    Returns
    -------
    sig: 1d array
        Simulated fractional gaussian noise time series.

    Notes
    -----
    The time series can be specified with either a desired power law exponent,
    or alternatively with a specified Hurst parameter.

    The Hurst parameter is not the Hurst exponent as defined in rescaled range analysis.
    The Hurst parameter is defined for self-similar processes such that Y(at) = a^H Y(t)
    for all a > 0, where this equality holds in distribution.

    The relationship between the power law exponent chi and the Hurst parameter
    for fractional gaussian noise is chi = 2 * hurst - 1.

    For more information, consult [1]_.

    References
    ----------
    .. [1] Eke, A., Herman, P., Kocsis, L., & Kozak, L. R. (2002). Fractal characterization of
           complexity in temporal physiological signals. Physiological Measurement, 23(1), R1–R38.
           DOI: https://doi.org/10.1088/0967-3334/23/1/201

    Examples
    --------
    Simulate fractional gaussian noise with a power law decay of 0 (white noise):

    >>> sig = sim_frac_gaussian_noise(n_seconds=1, fs=500, chi=0)

    Simulate fractional gaussian noise with a Hurst parameter of 0.5 (also white noise):

    >>> sig = sim_frac_gaussian_noise(n_seconds=1, fs=500, hurst=0.5)
    """

    if hurst is not None:
        check_param_range(hurst, 'hurst', (0, 1))

    else:
        check_param_range(chi, 'chi', (-1, 1))

        # Infer the hurst parameter from chi
        hurst = (-chi + 1.) / 2

    # Compute the number of samples for the simulated time series
    n_samples = compute_nsamples(n_seconds, fs)

    # Define helper function for computing the auto-covariance
    def autocov(hurst):
        return lambda k: 0.5 * (np.abs(k - 1) ** (2 * hurst) - 2 * \
                                k ** (2 * hurst) + (k + 1) ** (2 * hurst))

    # Build the autocovariance matrix
    gamma = np.arange(0, n_samples)
    gamma = np.apply_along_axis(autocov(hurst), 0, gamma)
    autocov_matrix = toeplitz(gamma)

    # Use the Cholesky factor to transform white noise to get the desired time series
    white_noise = np.random.randn(n_samples)
    cholesky_factor = cholesky(autocov_matrix, lower=True)
    sig = cholesky_factor @ white_noise

    return sig
Example #34
0
    def reduced_likelihood_function(self, theta=None):
        """
        This function determines the BLUP parameters and evaluates the reduced
        likelihood function for the given autocorrelation parameters theta.

        Maximizing this function wrt the autocorrelation parameters theta is
        equivalent to maximizing the likelihood of the assumed joint Gaussian
        distribution of the observations y evaluated onto the design of
        experiments X.

        Parameters
        ----------
        theta : array_like, optional
            An array containing the autocorrelation parameters at which the
            Gaussian Process model parameters should be determined.
            Default uses the built-in autocorrelation parameters
            (ie theta = self.theta).

        Returns
        -------
        reduced_likelihood_function_value : double
            The value of the reduced likelihood function associated to the
            given autocorrelation parameters theta.

        par : dict
            A dictionary containing the requested Gaussian Process model
            parameters:

                sigma2
                        Gaussian Process variance.
                beta
                        Generalized least-squares regression weights for
                        Universal Kriging or given beta0 for Ordinary
                        Kriging.
                gamma
                        Gaussian Process weights.
                C
                        Cholesky decomposition of the correlation matrix [R].
                Ft
                        Solution of the linear equation system : [R] x Ft = F
                G
                        QR decomposition of the matrix Ft.
        """

        if theta is None:
            # Use built-in autocorrelation parameters
            theta = self.theta

        # Initialize output
        reduced_likelihood_function_value = -np.inf
        par = {}

        # Retrieve data
        n_samples = self.X.shape[0]
        D = self.D
        ij = self.ij
        F = self.F

        if D is None:
            # Light storage mode (need to recompute D, ij and F)
            D, ij = l1_cross_distances(self.X)
            if np.min(np.sum(D, axis=1)) == 0. \
                                    and self.corr != correlation.pure_nugget:
                raise Exception("Multiple X are not allowed")
            F = self.regr(self.X)

        # Set up R
        r = self.corr(theta, D)
        R = np.eye(n_samples) * (1. + self.nugget)
        R[ij[:, 0], ij[:, 1]] = r
        R[ij[:, 1], ij[:, 0]] = r

        # Cholesky decomposition of R
        try:
            C = linalg.cholesky(R, lower=True)
        except linalg.LinAlgError:
            return reduced_likelihood_function_value, par

        # Get generalized least squares solution
        Ft = solve_triangular(C, F, lower=True)
        try:
            Q, G = linalg.qr(Ft, econ=True)
        except:
            #/usr/lib/python2.6/dist-packages/scipy/linalg/decomp.py:1177:
            # DeprecationWarning: qr econ argument will be removed after scipy
            # 0.7. The economy transform will then be available through the
            # mode='economic' argument.
            Q, G = linalg.qr(Ft, mode='economic')
            pass

        sv = linalg.svd(G, compute_uv=False)
        rcondG = sv[-1] / sv[0]
        if rcondG < 1e-10:
            # Check F
            sv = linalg.svd(F, compute_uv=False)
            condF = sv[0] / sv[-1]
            if condF > 1e15:
                raise Exception("F is too ill conditioned. Poor combination " +
                                "of regression model and observations.")
            else:
                # Ft is too ill conditioned, get out (try different theta)
                return reduced_likelihood_function_value, par

        Yt = solve_triangular(C, self.y, lower=True)
        if self.beta0 is None:
            # Universal Kriging
            beta = solve_triangular(G, np.dot(Q.T, Yt))
        else:
            # Ordinary Kriging
            beta = np.array(self.beta0)

        rho = Yt - np.dot(Ft, beta)
        sigma2 = (rho**2.).sum(axis=0) / n_samples
        # The determinant of R is equal to the squared product of the diagonal
        # elements of its Cholesky decomposition C
        detR = (np.diag(C)**(2. / n_samples)).prod()

        # Compute/Organize output
        reduced_likelihood_function_value = -sigma2.sum() * detR
        par['sigma2'] = sigma2 * self.y_std**2.
        par['beta'] = beta
        par['gamma'] = solve_triangular(C.T, rho)
        par['C'] = C
        par['Ft'] = Ft
        par['G'] = G

        return reduced_likelihood_function_value, par
Example #35
0
    def fit(self, X, y, ncores=1):
        """Fit a Gaussian process regression model on training forces

        Args:
            X (list): training configurations
            y (np.ndarray): training forces
            ncores (int): number of CPU workers to use, default is 1

        """
        self.kernel_ = self.kernel
        self.X_train_ = X
        self.y_train_ = np.reshape(y, (y.shape[0] * 3, 1))

        # if self.optimizer is not None:
        #     # Choose hyperparameters based on maximizing the log-marginal
        #     # likelihood (potentially starting from several initial values)
        #     def obj_func(theta, eval_gradient=True):
        #         if eval_gradient:
        #             lml, grad = self.log_marginal_likelihood(
        #                 theta, eval_gradient=True)
        #             return -lml, -grad
        #         else:
        #             return -self.log_marginal_likelihood(theta)

        #     # First optimize starting from theta specified in kernel_
        #     optima = [(self._constrained_optimization(obj_func,
        #                                               self.kernel_.theta,
        #                                               self.kernel_.bounds))]

        #     # Additional runs are performed from log-uniform chosen initial
        #     # theta
        #     if self.n_restarts_optimizer > 0:
        #         if not np.isfinite(self.kernel_.bounds).all():
        #             raise ValueError(
        #                 "Multiple optimizer restarts (n_restarts_optimizer>0) "
        #                 "requires that all bounds are finite.")
        #         bounds = self.kernel_.bounds
        #         for iteration in range(self.n_restarts_optimizer):
        #             theta_initial = \
        #                 self._rng.uniform(bounds[:, 0], bounds[:, 1])
        #             optima.append(
        #                 self._constrained_optimization(obj_func, theta_initial,
        #                                                bounds))
        #     # Select result from run with minimal (negative) log-marginal
        #     # likelihood
        #     lml_values = list(map(itemgetter(1), optima))
        #     self.kernel_.theta = optima[np.argmin(lml_values)][0]
        #     self.log_marginal_likelihood_value_ = -np.min(lml_values)
        # else:
        #     self.log_marginal_likelihood_value_ = \
        #         self.log_marginal_likelihood(self.kernel_.theta)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_.calc_gram(self.X_train_, ncores)
        K[np.diag_indices_from(K)] += self.noise

        try:  # Use Cholesky decomposition to build the lower triangular matrix
            self.L_ = cholesky(K, lower=True)
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'noise' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise

        # Calculate the alpha weights using the Cholesky method
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)
        self.K = K
        self.energy_alpha_ = None
        self.energy_K = None
        self.X_glob_train_ = None
        self.fitted[0] = 'force'
        self.n_train = len(self.y_train_) // 3

        return self
Example #36
0
def balreal_iter_old(A,
                     B,
                     C,
                     lowrank=True,
                     tolSmith=1e-10,
                     tolSVD=1e-6,
                     kmax=None,
                     tolAbs=False):
    """
    Find balanced realisation of DLTI system.

    Notes: Lyapunov equations are solved using iterative squared Smith
    algorithm, in its low or full rank version. These implementations are
    as per the low_rank_smith and smith_iter functions respectively but,
    for computational efficiency,, the iterations are rewritten here so as to
    solve for the observability and controllability Gramians contemporary.
    """

    ### Solve Lyapunov equations
    # Notation reminder:
    # scipy: A X A.T - X = -Q
    # contr: A W A.T - W = - B B.T
    # obser: A.T W A - W = - C.T C
    # low-rank smith: A.T X A - X = -Q Q.T

    if lowrank:  # low-rank square-Smith iteration (with SVD)

        # matrices size
        N = A.shape[0]
        rB = B.shape[1]
        rC = C.shape[0]

        # initialise smith iteration
        DeltaNorm = 1e6
        print('Iter\tMaxZhere')
        kk = 0
        Apow = A
        Qck = B
        Qok = C.T

        while DeltaNorm > tolSmith:
            ### compute products Ak^2 * Zk
            ### (use block Arnoldi)
            Qcright = np.dot(Apow, Qck)
            Qoright = np.dot(Apow.T, Qok)
            Apow = np.dot(Apow, Apow)

            ### enlarge Z matrices
            Qck = np.concatenate((Qck, Qcright), axis=1)
            Qok = np.concatenate((Qok, Qoright), axis=1)

            ### check convergence without reconstructing the added term
            MaxZhere = max(np.max(np.abs(Qoright)), np.max(np.abs(Qcright)))
            print('%.4d\t%.3e' % (kk, MaxZhere))
            DeltaNorm = MaxZhere

            # fixed columns chopping
            if kmax is None:
                # cheap SVD truncation
                if Qck.shape[1] > .4 * N or Qok.shape[1] > .4 * N:
                    Uc, svc = scalg.svd(Qck, full_matrices=False)[:2]
                    Uo, svo = scalg.svd(Qok, full_matrices=False)[:2]
                    if tolAbs:
                        rcmax = np.sum(svc > tolSVD)
                        romax = np.sum(svo > tolSVD)
                    else:
                        rcmax = np.sum(svc > tolSVD * svc[0])
                        romax = np.sum(svo > tolSVD * svo[0])
                    pmax = max(rcmax, romax)
                    Qck = Uc[:, :pmax] * svc[:pmax]
                    Qok = Uo[:, :pmax] * svo[:pmax]
                # Qck_old=np.dot(Uc[:,:pmax],np.diag(svc[:pmax]))
                # Qok_old=np.dot(Uo[:,:pmax],np.diag(svo[:pmax]))
                # Qck=np.dot(Uc[:,:rcmax],np.diag(svc[:rcmax]))
                # Qok=np.dot(Uo[:,:romax],np.diag(svo[:romax]))
            else:
                if Qck.shape[1] > kmax:
                    Uc, svc = scalg.svd(Qck, full_matrices=False)[:2]
                    Qck = Uc[:, :kmax] * svc[:kmax]
                if Qok.shape[1] > kmax:
                    Uo, svo = scalg.svd(Qok, full_matrices=False)[:2]
                    Qok = Uo[:, :kmax] * svo[:kmax]

            ### update
            kk = kk + 1

        del Apow
        Qc, Qo = Qck, Qok

    else:  # full-rank squared smith iteration (with Cholevsky)

        # first iteration
        Wc = np.dot(B, B.T)
        Wo = np.dot(C.T, C)
        Apow = A
        AXAobs = np.dot(np.dot(A.T, Wo), A)
        AXActrl = np.dot(np.dot(A, Wc), A.T)
        DeltaNorm = max(np.max(np.abs(AXAobs)), np.max(np.abs(AXActrl)))

        kk = 1
        print('Iter\tRes')
        while DeltaNorm > tolSmith:
            kk = kk + 1

            # update
            Wo = Wo + AXAobs
            Wc = Wc + AXActrl

            # incremental
            Apow = np.dot(Apow, Apow)
            AXAobs = np.dot(np.dot(Apow.T, Wo), Apow)
            AXActrl = np.dot(np.dot(Apow, Wc), Apow.T)
            DeltaNorm = max(np.max(np.abs(AXAobs)), np.max(np.abs(AXActrl)))
            print('%.4d\t%.3e' % (kk, DeltaNorm))
        # final update (useless in very low tolerance)
        Wo = Wo + AXAobs
        Wc = Wc + AXActrl

        # Choleski factorisation: W=Q Q.T. If unsuccessful, directly solve
        # eigenvalue problem
        Qc = scalg.cholesky(Wc).T
        Qo = scalg.cholesky(Wo).T
    # # eigenvalues are normalised by one, hence Tinv and T matrices
    # # here are not scaled
    # ssq,Tinv,T=scalg.eig(np.dot(Wc,Wo),left=True,right=True)
    # Tinv=Tinv.T
    # #Tinv02=Tinv02.T
    # S=np.diag(np.sqrt(ssq))
    # return S,T,Tinv

    # find min size (only if iter used)
    cc, co = Qc.shape[1], Qo.shape[1]
    cmin = min(cc, co)
    print('cc=%.2d, co=%.2d' % (cc, co))

    # build M matrix and SVD
    M = np.dot(Qo.T, Qc)

    # ### not optimised
    # U,s,Vh=scalg.svd(M,full_matrices=True)
    # U,Vh,s=U[:,:cmin],Vh[:cmin,:],s[:cmin]
    # S=np.diag(s)
    # Sinv=np.diag(1./s)
    # V=Vh.T
    # # Build transformation matrices
    # T=np.dot(Qc,np.dot(V,np.sqrt(Sinv)))
    # Tinv=np.dot(np.sqrt(Sinv),np.dot(U.T,Qo.T))

    ### optimised
    U, s, Vh = scalg.svd(
        M, full_matrices=True)  # as M is square, full_matrices has no effect
    sinv = s**(-0.5)
    T = np.dot(Qc, Vh.T * sinv)
    Tinv = np.dot((U * sinv).T, Qo.T)

    return s, T, Tinv
Example #37
0
bk[0::DOF] = check
bk[1::DOF] = check
bk[2::DOF] = check
bu = ~bk # defining unknown DOFs

# sub-matrices corresponding to unknown DOFs
Kuu = K[bu, :][:, bu]
Muu = M[bu, :][:, bu]

# solving generalized eigenvalue problem
# NOTE: extracting ALL eigenvectors
eigvals_g, U = eigh(a=Kuu, b=Muu)
wn_g = eigvals_g**0.5

# solving symmetric eigenvalue problem
L = cholesky(Muu, lower=True)
Linv = np.linalg.inv(L)
Kuutilde = (Linv @ Kuu) @ Linv.T

#NOTE checking if Kuutilde is symmetric
assert np.allclose(Kuutilde, Kuutilde.T)

eigvals_s, V = eigh(Kuutilde)
wn_s = eigvals_s**0.5

print('eigenvalues (wn_generalized**2)', wn_g[:3]**2)
print('eigenvalues (wn_symmetric**2)  ', wn_s[:3]**2)
print()
print('checks for U')
for I, J in [[0, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 2]]:
    print('I =', I, 'J =', J,
Example #38
0
    def log_marginal_likelihood(self, theta=None, eval_gradient=False):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like, shape = (n_kernel_params,) or None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default: False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : array, shape = (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        kernel = self.kernel_.clone_with_theta(theta)

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
Example #39
0
    def fit(self, X, y):
        """Fit Gaussian process regression model

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Training data

        y : array-like, shape = (n_samples, [n_output_dims])
            Target values

        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.kernel_ = clone(self.kernel)

        self.rng = check_random_state(self.random_state)

        X, y = check_X_y(X, y, multi_output=True, y_numeric=True)

        # Normalize target value
        if self.normalize_y:
            self.y_train_mean = np.mean(y, axis=0)
            # demean y
            y = y - self.y_train_mean
        else:
            self.y_train_mean = np.zeros(1)

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError(
                    "alpha must be a scalar or an array"
                    " with same number of entries as y.(%d != %d)" %
                    (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self.rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        self.L_ = cholesky(K, lower=True)  # Line 2
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3

        return self
Example #40
0
    def loglkl(self, cosmo, data):

        # class' Omega_m includes Omega_nu etc!
        # but Omega0_m doesn't!
        # ATTENTION: definition of Omega_m in CLASS has changed again: Omega_m = self.ba.Omega0_cdm+self.ba.Omega0_b
        # But I think Omega_m should also contain densities of other species!!!
        #Omega_m = cosmo.Omega_m()
        # this is now a copy of what is returned as Omega_m to MontePython:
        # that didn't work, because ".ba" is not available...
        #Omega_m = cosmo.ba.Omega0_b + cosmo.ba.Omega0_cdm + cosmo.ba.Omega0_ncdm_tot + cosmo.ba.Omega0_dcdm
        # Next try:
        # Omega_m() = self.ba.Omega0_cdm+self.ba.Omega0_b
        # Omega_nu = self.ba.Omega0_ncdm_tot
        # only contributions from decaying DM missing...
        # be careful though, if at some point Omega_m is defined again to contain every species' contribution
        # it does contain all species again in CLASS 2.5.0! #+ cosmo.Omega_nu
        # TODO: Always check definition of cosmo.Omega_m() in classy.pyx!!!
        self.Omega_m = cosmo.Omega_m()
        self.small_h = cosmo.h()

        # m-correction:
        # Errors on m-corrections for different z-bins are correlated, thus one free nuisance "m_corr" is enough,
        # We fix the amplitude to the 2\sigma range around the fiducial m-correction value from the lowest redshift-bin
        # for that and add the delta_m to all fiducial m-corrections, hence:
        param_name = 'm_corr'
        if param_name in data.mcmc_parameters:
            m_corr = data.mcmc_parameters[param_name][
                'current'] * data.mcmc_parameters[param_name]['scale']
            #ATTENTION: sign matters and this order is the correct one for correlation if delta_m_corr is added!
            delta_m_corr = m_corr - self.m_corr_fiducial_per_zbin[0]
            # this is wrong!
            #m_corr_per_zbin = [m_corr_z1]
            #m_corr_per_zbin = [self.m_corr_fiducial_per_zbin[0] + delta_m_corr]
            m_corr_per_zbin = np.zeros(self.nzbins)
            for zbin in xrange(0, self.nzbins):
                m_corr_per_zbin[
                    zbin] = self.m_corr_fiducial_per_zbin[zbin] + delta_m_corr
        else:
            # if "m_corr" is not specified in input parameter script we just apply the fiducial m-correction values
            # if these could not be loaded, this vector contains only zeros!
            m_corr_per_zbin = self.m_corr_fiducial_per_zbin

        # draw m-correction now instead from a multivariate Gaussian taking the fully correlated errors into account:
        # this does not yield converging chains in reasonable runtimes (e.g. 3 z-bins > 1000 CPUh...)
        '''
        if self.marginalize_over_multiplicative_bias:
            if self.nzbins > 1:
                m_corr_per_zbin = np.random.multivariate_normal(self.m_corr_fiducial_per_zbin, self.cov_m_corr)
                #print 'm-correction'
                #print self.m_corr_fiducial_per_zbin, self.cov_m_corr
                #print m_corr_per_zbin
            else:
                m_corr_per_zbin = np.random.normal(self.m_corr_fiducial_per_zbin, self.err_multiplicative_bias)
        else:
            m_corr_per_zbin = self.m_corr_fiducial_per_zbin
        '''

        # needed for IA modelling:
        if ('A_IA' in data.mcmc_parameters) and ('exp_IA'
                                                 in data.mcmc_parameters):
            amp_IA = data.mcmc_parameters['A_IA'][
                'current'] * data.mcmc_parameters['A_IA']['scale']
            exp_IA = data.mcmc_parameters['exp_IA'][
                'current'] * data.mcmc_parameters['exp_IA']['scale']
            intrinsic_alignment = True
        elif ('A_IA'
              in data.mcmc_parameters) and ('exp_IA'
                                            not in data.mcmc_parameters):
            amp_IA = data.mcmc_parameters['A_IA'][
                'current'] * data.mcmc_parameters['A_IA']['scale']
            # redshift-scaling is turned off:
            exp_IA = 0.

            intrinsic_alignment = True
        else:
            intrinsic_alignment = False

        if intrinsic_alignment:
            self.rho_crit = self.get_critical_density()
            # derive the linear growth factor D(z)
            linear_growth_rate = np.zeros_like(self.redshifts)
            #print self.redshifts
            for index_z, z in enumerate(self.redshifts):
                try:
                    # for CLASS ver >= 2.6:
                    linear_growth_rate[
                        index_z] = cosmo.scale_independent_growth_factor(z)
                except:
                    # my own function from private CLASS modification:
                    linear_growth_rate[index_z] = cosmo.growth_factor_at_z(z)
            # normalize to unity at z=0:
            try:
                # for CLASS ver >= 2.6:
                linear_growth_rate /= cosmo.scale_independent_growth_factor(0.)
            except:
                # my own function from private CLASS modification:
                linear_growth_rate /= cosmo.growth_factor_at_z(0.)

        #residual noise correction amplitude:
        #param_name = 'A_noise'
        # zeros == False!
        A_noise = np.zeros(self.nzbins)
        add_noise_power = np.zeros(self.nzbins, dtype=bool)
        param_name = 'A_noise_corr'
        if param_name in data.mcmc_parameters:
            # assume correlated apmlitudes for the noise-power (i.e. same amplitude for all autocorrelations):
            A_noise[:] = data.mcmc_parameters[param_name][
                'current'] * data.mcmc_parameters[param_name]['scale']
            add_noise_power[:] = True
        else:
            # assume uncorrelated amplitudes for the noise-power:
            for zbin in xrange(self.nzbins):

                param_name = 'A_noise_z{:}'.format(zbin + 1)

                if param_name in data.mcmc_parameters:
                    A_noise[zbin] = data.mcmc_parameters[param_name][
                        'current'] * data.mcmc_parameters[param_name]['scale']
                    add_noise_power[zbin] = True

        # this is not correct, if this is considered to be a calibration!
        '''
        # this is all for B-mode power-law model:
        param_name1 = 'A_B_modes'
        param_name2 = 'exp_B_modes'
        use_B_mode_model = False
        if param_name1 in data.mcmc_parameters and param_name2 in data.mcmc_parameters:
            amp_BB = data.mcmc_parameters[param_name1]['current'] * data.mcmc_parameters[param_name1]['scale']
            exp_BB = data.mcmc_parameters[param_name2]['current'] * data.mcmc_parameters[param_name2]['scale']
            use_B_mode_model = True
        '''
        # this was the fiducial approach for the first submission
        # the one above might be faster (and more consistent)
        if self.correct_resetting_bias:
            #A_B_modes = np.random.normal(self.best_fit_A_B_modes, self.best_fit_err_A_B_modes)
            #exp_B_modes = np.random.normal(self.best_fit_exp_B_modes, self.best_fit_err_exp_B_modes)
            amp_BB, exp_BB = np.random.multivariate_normal(
                self.params_resetting_bias, self.cov_resetting_bias)
            #print 'resetting bias'
            #print self.params_resetting_bias, self.cov_resetting_bias
            #print amp_BB, exp_BB

        # get distances from cosmo-module:
        r, dzdr = cosmo.z_of_r(self.redshifts)

        # 1) determine l-range for taking the sum, #l = l_high-l_min at least!!!:
        # this is the correct calculation!
        # for real data, I should start sum from physical scales, i.e., currently l>= 80!
        # TODO: Set this automatically!!! --> not automatically yet, but controllable via "myCFHTLenS_tomography.data"!!!
        # these are integer l-values over which we will take the sum used in the convolution with the band window matrix
        ells_min = self.ells_intp[0]
        '''
        if self.key == 'data_XinPi':
            ells_sum = self.ell_bin_centers
            # TODO: This might cause trouble!!!
            ells_max = 5150.
        else:
            ells_max = self.ells_intp[-1]
            nells = int(ells_max - ells_min + 1)
            ells_sum = np.linspace(ells_min, ells_max, nells)
        '''
        ells_max = self.ells_intp[-1]
        nells = int(ells_max - ells_min + 1)
        ells_sum = np.linspace(ells_min, ells_max, nells)

        # these are the l-nodes for the derivation of the theoretical Cl:
        ells = np.logspace(np.log10(ells_min), np.log10(ells_max),
                           self.nellsmax)

        # After long and extensive testing:
        # Don't put calls to Class (i.e. cosmo...) into a loop...
        # before "pk" and the constants were just called at demand below in the code (due to convenience an copy & paste)
        # which seemed to have been the source for the memory leak...

        # Get power spectrum P(k=l/r,z(r)) from cosmological module
        # this doesn't really have to go into the loop over fields!
        pk = np.zeros((self.nellsmax, self.nzmax), 'float64')
        k_max_in_inv_Mpc = self.k_max_h_by_Mpc * self.small_h
        for index_ells in xrange(self.nellsmax):
            for index_z in xrange(1, self.nzmax):
                # standard Limber approximation:
                #k = ells[index_ells] / r[index_z]
                # extended Limber approximation (cf. LoVerde & Afshordi 2008):
                k_in_inv_Mpc = (ells[index_ells] + 0.5) / r[index_z]
                if k_in_inv_Mpc > k_max_in_inv_Mpc:
                    pk_dm = 0.
                else:
                    pk_dm = cosmo.pk(k_in_inv_Mpc, self.redshifts[index_z])
                #pk[index_ells,index_z] = cosmo.pk(ells[index_ells]/r[index_z], self.redshifts[index_z])
                if self.baryon_feedback:
                    if 'A_bary' in data.mcmc_parameters:
                        A_bary = data.mcmc_parameters['A_bary'][
                            'current'] * data.mcmc_parameters['A_bary']['scale']
                        #print 'A_bary={:.4f}'.format(A_bary)
                        pk[index_ells,
                           index_z] = pk_dm * self.baryon_feedback_bias_sqr(
                               k_in_inv_Mpc / self.small_h,
                               self.redshifts[index_z],
                               A_bary=A_bary)
                    else:
                        pk[index_ells,
                           index_z] = pk_dm * self.baryon_feedback_bias_sqr(
                               k_in_inv_Mpc / self.small_h,
                               self.redshifts[index_z])
                else:
                    pk[index_ells, index_z] = pk_dm

        # for KiDS-450 constant biases in photo-z are not sufficient:
        if self.bootstrap_photoz_errors:
            # draw a random bootstrap n(z); borders are inclusive!
            random_index_bootstrap = np.random.randint(
                int(self.index_bootstrap_low),
                int(self.index_bootstrap_high) + 1)
            #print 'Bootstrap index:', random_index_bootstrap
            pz = np.zeros((self.nzmax, self.nzbins), 'float64')
            pz_norm = np.zeros(self.nzbins, 'float64')
            for zbin in xrange(self.nzbins):

                redshift_bin = self.redshift_bins[zbin]
                #ATTENTION: hard-coded subfolder!
                #index can be recycled since bootstraps for tomographic bins are independent!
                fname = os.path.join(
                    self.data_directory,
                    '{:}/bootstraps/{:}/n_z_avg_bootstrap{:}.hist'.format(
                        self.photoz_method, redshift_bin,
                        random_index_bootstrap))
                z_hist, n_z_hist = np.loadtxt(fname, unpack=True)

                param_name = 'D_z{:}'.format(zbin + 1)
                if param_name in data.mcmc_parameters:
                    z_mod = self.redshifts + data.mcmc_parameters[param_name][
                        'current'] * data.mcmc_parameters[param_name]['scale']
                else:
                    z_mod = self.redshifts

                shift_to_midpoint = np.diff(z_hist)[0] / 2.
                spline_pz = itp.splrep(z_hist + shift_to_midpoint, n_z_hist)
                mask_min = z_mod >= z_hist.min() + shift_to_midpoint
                mask_max = z_mod <= z_hist.max() + shift_to_midpoint
                mask = mask_min & mask_max
                # points outside the z-range of the histograms are set to 0!
                pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)

                dz = self.redshifts[1:] - self.redshifts[:-1]
                pz_norm[zbin] = np.sum(0.5 * (pz[1:, zbin] + pz[:-1, zbin]) *
                                       dz)

            pr = pz * (dzdr[:, np.newaxis] / pz_norm)

        elif (not self.bootstrap_photoz_errors) and (
                self.shift_n_z_by_D_z.any()):

            pz = np.zeros((self.nzmax, self.nzbins), 'float64')
            pz_norm = np.zeros(self.nzbins, 'float64')
            for zbin in xrange(self.nzbins):

                param_name = 'D_z{:}'.format(zbin + 1)
                if param_name in data.mcmc_parameters:
                    z_mod = self.redshifts + data.mcmc_parameters[param_name][
                        'current'] * data.mcmc_parameters[param_name]['scale']
                else:
                    z_mod = self.redshifts
                # Load n(z) again:
                redshift_bin = self.redshift_bins[zbin]
                fname = os.path.join(
                    self.data_directory,
                    '{:}/n_z_avg_{:}.hist'.format(self.photoz_method,
                                                  redshift_bin))
                z_hist, n_z_hist = np.loadtxt(fname,
                                              usecols=(0, 1),
                                              unpack=True)
                shift_to_midpoint = np.diff(z_hist)[0] / 2.
                spline_pz = itp.splrep(z_hist + shift_to_midpoint, n_z_hist)
                mask_min = z_mod >= z_hist.min() + shift_to_midpoint
                mask_max = z_mod <= z_hist.max() + shift_to_midpoint
                mask = mask_min & mask_max
                # points outside the z-range of the histograms are set to 0!
                pz[mask, zbin] = itp.splev(z_mod[mask], spline_pz)
                # Normalize selection functions
                dz = self.redshifts[1:] - self.redshifts[:-1]
                pz_norm[zbin] = np.sum(0.5 * (pz[1:, zbin] + pz[:-1, zbin]) *
                                       dz)

            pr = pz * (dzdr[:, np.newaxis] / pz_norm)

        else:
            pr = self.pz * (dzdr[:, np.newaxis] / self.pz_norm)

        # Compute function g_i(r), that depends on r and the bin
        # g_i(r) = 2r(1+z(r)) int_r^+\infty drs eta_r(rs) (rs-r)/rs
        g = np.zeros((self.nzmax, self.nzbins), 'float64')
        for zbin in xrange(self.nzbins):
            # assumes that z[0] = 0
            for nr in xrange(1, self.nzmax - 1):
                #for nr in xrange(self.nzmax - 1):
                fun = pr[nr:, zbin] * (r[nr:] - r[nr]) / r[nr:]
                g[nr, zbin] = np.sum(0.5 * (fun[1:] + fun[:-1]) *
                                     (r[nr + 1:] - r[nr:-1]))
                g[nr, zbin] *= 2. * r[nr] * (1. + self.redshifts[nr])

        # Start loop over l for computation of C_l^shear
        Cl_GG_integrand = np.zeros((self.nzmax, self.nzbins, self.nzbins),
                                   'float64')
        Cl_GG = np.zeros((self.nellsmax, self.nzbins, self.nzbins), 'float64')

        if intrinsic_alignment:
            Cl_II_integrand = np.zeros_like(Cl_GG_integrand)
            Cl_II = np.zeros_like(Cl_GG)

            Cl_GI_integrand = np.zeros_like(Cl_GG_integrand)
            Cl_GI = np.zeros_like(Cl_GG)

        dr = r[1:] - r[:-1]
        # removing shifts like array[1:, ...] which assume that z[0] = 0:
        for index_ell in xrange(self.nellsmax):

            # find Cl_integrand = (g(r) / r)**2 * P(l/r,z(r))
            for zbin1 in xrange(self.nzbins):
                for zbin2 in xrange(zbin1 + 1):  #self.nzbins):
                    Cl_GG_integrand[1:, zbin1, zbin2] = g[1:, zbin1] * g[
                        1:, zbin2] / r[1:]**2 * pk[index_ell, 1:]

                    if intrinsic_alignment:
                        factor_IA = self.get_factor_IA(
                            self.redshifts[1:], linear_growth_rate[1:], amp_IA,
                            exp_IA)  #/ self.dzdr[1:]
                        #print F_of_x
                        #print self.eta_r[1:, zbin1].shape
                        Cl_II_integrand[1:, zbin1, zbin2] = pr[1:, zbin1] * pr[
                            1:,
                            zbin2] * factor_IA**2 / r[1:]**2 * pk[index_ell,
                                                                  1:]
                        Cl_GI_integrand[1:, zbin1, zbin2] = (
                            g[1:, zbin1] * pr[1:, zbin2] +
                            g[1:, zbin2] * pr[1:, zbin1]
                        ) * factor_IA / r[1:]**2 * pk[index_ell, 1:]

            # Integrate over r to get C_l^shear_ij = P_ij(l)
            # C_l^shear_ii = 9/4 Omega0_m^2 H_0^4 \sum_0^rmax dr (g_i(r) g_j(r) /r**2) P(k=l/r,z(r))
            for zbin1 in xrange(self.nzbins):
                for zbin2 in xrange(zbin1 + 1):  #self.nzbins):
                    Cl_GG[index_ell, zbin1, zbin2] = np.sum(
                        0.5 * (Cl_GG_integrand[1:, zbin1, zbin2] +
                               Cl_GG_integrand[:-1, zbin1, zbin2]) * dr)
                    # here we divide by 16, because we get a 2^2 from g(z)!
                    Cl_GG[
                        index_ell, zbin1,
                        zbin2] *= 9. / 16. * self.Omega_m**2  # in units of Mpc**4
                    Cl_GG[index_ell, zbin1,
                          zbin2] *= (self.small_h / 2997.9)**4  # dimensionless

                    if intrinsic_alignment:
                        Cl_II[index_ell, zbin1, zbin2] = np.sum(
                            0.5 * (Cl_II_integrand[1:, zbin1, zbin2] +
                                   Cl_II_integrand[:-1, zbin1, zbin2]) * dr)

                        Cl_GI[index_ell, zbin1, zbin2] = np.sum(
                            0.5 * (Cl_GI_integrand[1:, zbin1, zbin2] +
                                   Cl_GI_integrand[:-1, zbin1, zbin2]) * dr)
                        # here we divide by 4, because we get a 2 from g(r)!
                        Cl_GI[index_ell, zbin1,
                              zbin2] *= 3. / 4. * self.Omega_m
                        Cl_GI[index_ell, zbin1,
                              zbin2] *= (self.small_h / 2997.9)**2

        if intrinsic_alignment:
            Cl = Cl_GG + Cl_GI + Cl_II
        else:
            Cl = Cl_GG

        # ordering of redshift bins is correct in definition of theory below!
        theory_EE = np.zeros((self.nzcorrs, self.band_offset_EE), 'float64')
        theory_BB = np.zeros((self.nzcorrs, self.band_offset_BB), 'float64')
        theory_noise_EE = np.zeros((self.nzcorrs, self.band_offset_EE),
                                   'float64')
        theory_noise_BB = np.zeros((self.nzcorrs, self.band_offset_BB),
                                   'float64')
        #print theory.shape
        index_corr = 0
        #A_noise_corr = np.zeros(self.nzcorrs)
        for zbin1 in xrange(self.nzbins):
            for zbin2 in xrange(zbin1 + 1):  #self.nzbins):
                #correlation = 'z{:}z{:}'.format(zbin1 + 1, zbin2 + 1)
                ell_norm = ells_sum * (ells_sum + 1) / (2. * np.pi)
                # calculate m-correction vector here:
                # this loop goes over bands per z-corr; m-correction is the same for all bands in one tomographic bin!!!
                val_m_corr_EE = (1. + m_corr_per_zbin[zbin1]) * (
                    1. + m_corr_per_zbin[zbin2]) * np.ones(
                        len(self.bands_EE_to_use))
                val_m_corr_BB = (1. + m_corr_per_zbin[zbin1]) * (
                    1. + m_corr_per_zbin[zbin2]) * np.ones(
                        len(self.bands_BB_to_use))
                '''
                arg_a = (1. + A_noise[zbin1])
                arg_b = (1. + A_noise[zbin2])
                if np.sign(arg_a) < 0 and np.sign(arg_b) < 0:
                    sign = -1.
                elif np.sign(arg_a) < 0 or np.sign(arg_b) < 0:
                    sign = -1.
                else:
                    sign = 1.
                A_noise_corr[index_corr] = sign * self.sigma_e[zbin1] * self.sigma_e[zbin2] * np.sqrt(np.abs(arg_a)) * np.sqrt(np.abs(arg_b)) / (np.sqrt(self.n_eff[zbin1]) * np.sqrt(self.n_eff[zbin2]))
                '''
                # alternative definition, makes more sense than the one above:
                # I should add noise only to auto-correlations!
                if zbin1 == zbin2:
                    #A_noise_corr = self.sigma_e[zbin1] * self.sigma_e[zbin2] * (1. + A_noise[zbin1] + A_noise[zbin2]) / (np.sqrt(self.n_eff[zbin1]) * np.sqrt(self.n_eff[zbin2]))
                    # now the very simple definition should be sufficient!
                    A_noise_corr = A_noise[zbin1] * self.sigma_e[
                        zbin1]**2 / self.n_eff[zbin1]
                else:
                    A_noise_corr = 0.
                Cl_sample = Cl[:, zbin1, zbin2]
                spline_Cl = itp.splrep(ells, Cl_sample)
                D_l_EE = ell_norm * itp.splev(ells_sum, spline_Cl)
                # TODO: 1e-9 can either become an adjustable constant or a parameter!
                # taking out ell_norm now (a constant times ell_norm is just another noise-power component)
                if self.correct_resetting_bias:
                    # TODO: get ell_centers...
                    #x_BB = ell_center * (ell_center + 1.) / (2. * np.pi) * self.sigma_e[zbin1] * self.sigma_e[zbin2] / np.sqrt(self.n_eff[zbin1] * self.n_eff[zbin2])
                    # try to pull the model through the BWM first, that's more consistent with the code and doesn't require
                    x_BB = ell_norm * self.sigma_e[zbin1] * self.sigma_e[
                        zbin2] / np.sqrt(self.n_eff[zbin1] * self.n_eff[zbin2])
                    D_l_BB = self.get_B_mode_model(x_BB, amp_BB, exp_BB)
                #else:
                #    D_l_BB = self.scale_B_modes # * ell_norm
                D_l_noise = ell_norm * A_noise_corr

                #theory[zbin1, zbin2, :] = get_theory(ells_sum, D_l, self.ells_intp, band_window_matrix, self.band_offset, correlation, bwm_style=self.bwm_style)
                '''
                if self.key == 'data_XinPi':
                    theory_EE[index_corr, :] = D_l_EE
                    theory_BB[index_corr, :] = 0.

                    if add_noise_power.all():
                        theory_noise_EE[index_corr, :] = D_l_noise
                        theory_noise_BB[index_corr, :] = 0.
                else:
                    theory_EE[index_corr, :] = self.get_theory(ells_sum, D_l_EE, self.band_window_matrix, index_corr, band_type_is_EE=True)
                    if self.correct_resetting_bias:
                        theory_BB[index_corr, :] = self.get_theory(ells_sum, D_l_BB, self.band_window_matrix, index_corr, band_type_is_EE=False)
                    else:
                        theory_BB[index_corr, :] = 0.

                    if add_noise_power.all():
                        theory_noise_EE[index_corr, :] = self.get_theory(ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=True)
                        theory_noise_BB[index_corr, :] = self.get_theory(ells_sum, D_l_noise, self.band_window_matrix, index_corr, band_type_is_EE=False)
                '''
                theory_EE[index_corr, :] = self.get_theory(
                    ells_sum,
                    D_l_EE,
                    self.band_window_matrix,
                    index_corr,
                    band_type_is_EE=True)
                if self.correct_resetting_bias:
                    theory_BB[index_corr, :] = self.get_theory(
                        ells_sum,
                        D_l_BB,
                        self.band_window_matrix,
                        index_corr,
                        band_type_is_EE=False)
                else:
                    theory_BB[index_corr, :] = 0.

                if add_noise_power.all():
                    theory_noise_EE[index_corr, :] = self.get_theory(
                        ells_sum,
                        D_l_noise,
                        self.band_window_matrix,
                        index_corr,
                        band_type_is_EE=True)
                    theory_noise_BB[index_corr, :] = self.get_theory(
                        ells_sum,
                        D_l_noise,
                        self.band_window_matrix,
                        index_corr,
                        band_type_is_EE=False)

                if index_corr == 0:
                    m_corr_EE = val_m_corr_EE
                    m_corr_BB = val_m_corr_BB
                else:
                    m_corr_EE = np.concatenate((m_corr_EE, val_m_corr_EE))
                    m_corr_BB = np.concatenate((m_corr_BB, val_m_corr_BB))

                index_corr += 1

        # take care of m-correction:
        m_corr = np.concatenate((m_corr_EE, m_corr_BB))
        # this is required for scaling of covariance matrix:
        m_corr_matrix = np.matrix(m_corr).T * np.matrix(m_corr)

        theory_BB = theory_BB.flatten() + theory_noise_BB.flatten()
        theory_EE = theory_EE.flatten() + theory_noise_EE.flatten()
        band_powers_theory = np.concatenate((theory_EE, theory_BB))

        #apply m-corrections also to covariance:
        # we want elementwise division!!!
        covariance = self.covariance / np.asarray(m_corr_matrix)

        # some numpy-magic for slicing:
        cov_sliced = covariance[np.ix_(self.indices_for_bands_to_use,
                                       self.indices_for_bands_to_use)]

        # invert covariance matrix:
        #inv_cov_sliced = np.linalg.inv(cov_sliced)

        # Eq. 16 from Heymans et al. 2013 (arxiv:1303.1808v1)
        # not necessary for analytical covariance!
        '''
        if self.use_debias_factor:
            params = len(self.indices_for_bands_to_use)
            debias_factor = (self.nmocks - params - 2.) / (self.nmocks - 1.)
        else:
            debias_factor = 1.

        inverse_covariance_debiased = debias_factor * inv_cov_sliced
        '''

        # m-correction is applied to DATA! Can also be marginalized over!
        difference_vector = (self.band_powers / m_corr) - band_powers_theory
        difference_vector = difference_vector[self.indices_for_bands_to_use]

        # Don't invert that matrix!
        #chi2 = difference_vector.T.dot(inv_cov_sliced.dot(difference_vector))
        # this is for running smoothly with MultiNest
        # (in initial checking of prior space, there might occur weird solutions)
        if np.isinf(band_powers_theory).any() or np.isnan(
                band_powers_theory).any():
            chi2 = 2e12
        else:
            # use a Cholesky decomposition instead:
            cholesky_transform = cholesky(cov_sliced, lower=True)
            yt = solve_triangular(cholesky_transform,
                                  difference_vector,
                                  lower=True)
            chi2 = yt.dot(yt)

        return -0.5 * chi2
Example #41
0
 def update_variance(self, variance):
     self.L = cholesky(variance, lower=False)
     self.variance = variance
Example #42
0
            z_test = np.tanh(np.dot(x_test, w1.data.numpy()) + b1.data.numpy())
            z_test = np.tanh(np.dot(z_test, w2.data.numpy()) + b2.data.numpy())
            z_test = np.dot(z_test, w3.data.numpy()) + b3.data.numpy()
            #z_test=10.0*z_test

            znp = np.tanh(
                np.dot(xnp[j * batch_size:(j + 1) *
                           batch_size], w1.data.numpy()) + b1.data.numpy())
            znp = np.tanh(np.dot(znp, w2.data.numpy()) + b2.data.numpy())
            znp = np.dot(znp, w3.data.numpy()) + b3.data.numpy()
            #znp=10.0*znp

            for k in range(0,
                           xnp[j * batch_size:(j + 1) * batch_size].shape[0]):
                K1[:, k] = np.exp(-0.5 * np.sum((z_test - znp[[k], :])**2, 1))
                K2[:, k] = np.exp(-0.5 * np.sum((znp - znp[[k], :])**2, 1))
                K2[k, k] += (1.0 / (np.exp(-nug) + 1.0) + 1e-8)
            L = cholesky(K2, lower=True)
            L_inv = solve_triangular(L.T, np.eye(L.shape[0]))
            K_inv = L_inv.dot(L_inv.T)

            yp = np.dot(
                K1, np.dot(K_inv, ynp[j * batch_size:(j + 1) * batch_size]))
            yp2 = np.rint(yp)
            print(np.average(yp2 == y_test), np.sqrt(np.mean(
                (yp - y_test)**2)))

            #print(np.average(np.argmax(yp,1)==np.argmax(y_test,1)))
            #print(yp)
            #print(y_test)
Example #43
0
    def log_marginal_likelihood(self, theta=None, eval_gradient=False):
        """Return log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like, shape = (n_kernel_params,) or None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default: False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : array, shape = (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        kernel_l = self.kernel_l_.clone_with_theta(
            theta[1:1 + len(self.kernel_l_.theta)])
        kernel_d = self.kernel_d_.clone_with_theta(
            theta[-len(self.kernel_d_.theta):])
        rho = theta[0]

        if eval_gradient:
            raise Warning("eval_gradient = True mode is not implemented yet!")
        else:
            K = np.vstack((np.hstack(
                (kernel_l(self.X_train_[:self.n_l_]), rho * kernel_l(
                    self.X_train_[:self.n_l_], self.X_train_[self.n_l_:]))),
                           np.hstack(
                               (rho * kernel_l(self.X_train_[self.n_l_:],
                                               self.X_train_[:self.n_l_]),
                                rho**2 * kernel_l(self.X_train_[self.n_l_:]) +
                                kernel_d(self.X_train_[self.n_l_:])))))

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(
            np.diag(L)).sum()  # -0.5 log det (K) = log(L) (since K = LL^T)
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            raise Warning("eval_gradient = True mode is not implemented yet!")

        if eval_gradient:
            raise Warning("eval_gradient = True mode is not implemented yet!")
        else:
            return log_likelihood
Example #44
0
import numpy as np
import scipy as sp
import scipy.linalg as la

A = np.array([[1, 2, 3], [2, 1, 2], [3, 2, 1]])

L = la.cholesky(A)
Example #45
0
    def _reduced_likelihood_function(self, theta):
        """
        This function determines the BLUP parameters and evaluates the reduced
        likelihood function for the given autocorrelation parameters theta.

        Maximizing this function wrt the autocorrelation parameters theta is
        equivalent to maximizing the likelihood of the assumed joint Gaussian
        distribution of the observations y evaluated onto the design of
        experiments X.

        Parameters
        ----------
        theta: list(n_comp), optional
            - An array containing the autocorrelation parameters at which the
              Gaussian Process model parameters should be determined.

        Returns
        -------
        reduced_likelihood_function_value: real
            - The value of the reduced likelihood function associated to the
              given autocorrelation parameters theta.

        par: dict()
            - A dictionary containing the requested Gaussian Process model
              parameters:

            sigma2
            Gaussian Process variance.
            beta
            Generalized least-squares regression weights for
            Universal Kriging or for Ordinary Kriging.
            gamma
            Gaussian Process weights.
            C
            Cholesky decomposition of the correlation matrix [R].
            Ft
            Solution of the linear equation system : [R] x Ft = F
            Q, G
            QR decomposition of the matrix Ft.
        """
        # Initialize output
        reduced_likelihood_function_value = -np.inf
        par = {}
        # Set up R
        MACHINE_EPSILON = np.finfo(np.double).eps
        nugget = 10.0 * MACHINE_EPSILON
        if self.name == "MFK":
            if self._lvl != self.nlvl:
                # in the case of multi-fidelity optimization
                # it is very probable that lower-fidelity correlation matrix
                # becomes ill-conditionned
                nugget = 10.0 * nugget
        elif self.name in ["MGP"]:
            nugget = 100.0 * nugget
        noise = self.options["noise"]
        tmp_var = theta
        if self.name in ["MFK", "MFKPLS", "MFKPLSK"]:
            if self.options["eval_noise"]:
                theta = tmp_var[:-1]
                noise = tmp_var[-1]

        r = self._correlation_types[self.options["corr"]](theta,
                                                          self.D).reshape(
                                                              -1, 1)

        R = np.eye(self.nt) * (1.0 + nugget + noise)
        R[self.ij[:, 0], self.ij[:, 1]] = r[:, 0]
        R[self.ij[:, 1], self.ij[:, 0]] = r[:, 0]

        # Cholesky decomposition of R
        try:
            C = linalg.cholesky(R, lower=True)
        except (linalg.LinAlgError, ValueError) as e:
            print("exception : ", e)
            # raise e
            return reduced_likelihood_function_value, par

        # Get generalized least squares solution
        Ft = linalg.solve_triangular(C, self.F, lower=True)
        Q, G = linalg.qr(Ft, mode="economic")
        sv = linalg.svd(G, compute_uv=False)
        rcondG = sv[-1] / sv[0]
        if rcondG < 1e-10:
            # Check F
            sv = linalg.svd(self.F, compute_uv=False)
            condF = sv[0] / sv[-1]
            if condF > 1e15:
                raise Exception("F is too ill conditioned. Poor combination "
                                "of regression model and observations.")

            else:
                # Ft is too ill conditioned, get out (try different theta)
                return reduced_likelihood_function_value, par

        Yt = linalg.solve_triangular(C, self.y_norma, lower=True)
        beta = linalg.solve_triangular(G, np.dot(Q.T, Yt))
        rho = Yt - np.dot(Ft, beta)

        # The determinant of R is equal to the squared product of the diagonal
        # elements of its Cholesky decomposition C
        detR = (np.diag(C)**(2.0 / self.nt)).prod()

        # Compute/Organize output
        if self.name in ["MFK", "MFKPLS", "MFKPLSK"]:
            n_samples = self.nt
            p = self.p
            q = self.q
            sigma2 = (rho**2.0).sum(axis=0) / (n_samples - p - q)
            reduced_likelihood_function_value = -(
                n_samples - p -
                q) * np.log10(sigma2) - n_samples * np.log10(detR)
        else:
            sigma2 = (rho**2.0).sum(axis=0) / (self.nt)
            reduced_likelihood_function_value = -np.log(
                sigma2.sum()) - np.log(detR)
        par["sigma2"] = sigma2 * self.y_std**2.0
        par["beta"] = beta
        par["gamma"] = linalg.solve_triangular(C.T, rho)
        par["C"] = C
        par["Ft"] = Ft
        par["G"] = G
        par["Q"] = Q

        if self.name in ["MGP"]:
            reduced_likelihood_function_value += self._reduced_log_prior(theta)

        # A particular case when f_min_cobyla fail
        if (self.best_iteration_fail is not None) and (
                not np.isinf(reduced_likelihood_function_value)):

            if reduced_likelihood_function_value > self.best_iteration_fail:
                self.best_iteration_fail = reduced_likelihood_function_value
                self._thetaMemory = np.array(tmp_var)

        elif (self.best_iteration_fail is
              None) and (not np.isinf(reduced_likelihood_function_value)):
            self.best_iteration_fail = reduced_likelihood_function_value
            self._thetaMemory = np.array(tmp_var)

        return reduced_likelihood_function_value, par
Example #46
0
def logdet(A):
    return 2 * np.sum(np.log(np.diag(spl.cholesky(A))))
import numpy as np
import matplotlib.pyplot as plt
import GPy
from scipy import linalg
import scipy.spatial.distance as spdist

# Dataset according to the matlab code:
n = 20
np.random.seed(0)
x = 15 * (np.random.uniform(low=0, high=1, size=20) - 0.5).reshape((-1, 1))
sigma_y = 0.1
sigma_f = 1.0
l = 1.0
q = spdist.cdist(x / l, x / l, 'sqeuclidean')
A = (sigma_y**2) * np.eye(n) + (sigma_f**2) * np.exp(-0.5 * q)
B = linalg.cholesky(A)
B = B.conjugate()
y = B.dot(np.random.randn(n, 1))

xstar = np.linspace(-7.5, 7.5, 201)
xstar = xstar.reshape(-1, 1)

l = np.array([1.0, 0.3, 3.0])
sigma_f = np.array([1, 1.08, 1.16])
sigma_y = np.array([0.1, 0.00005, 0.89])


def generate_plots(sigma_f, l, sigma_y):
    kernel = GPy.kern.RBF(1, sigma_f, l)
    model = GPy.models.GPRegression(x, y, kernel)
    model.Gaussian_noise.variance = sigma_y**2
Example #48
0
def modes_system_undamped(M, K):
    r"""Return eigensolution of multiple DOF system.

    Returns the natural frequencies (w),
    eigenvectors (P), mode shapes (S) and the modal transformation
    matrix S for an undamped system.

    See Notes for explanation of the underlying math.

    Parameters
    ----------
    M: float array
        Mass matrix
    K: float array
        Stiffness matrix

    Returns
    -------
    w: float array
        The natural frequencies of the system
    P: float array
        The eigenvectors of the system.
    S: float array
        The mass-normalized mode shapes of the system.
    Sinv: float array
        The modal transformation matrix S^-1(takes x -> r(modal coordinates))

    Notes
    -----
    Given :math:`M\ddot{x}(t)+Kx(t)=0`, with mode shapes :math:`u`, the matrix
    of mode shapes :math:`S=[u_1 u_1 \ldots]` can be created. If the modal
    coordinates are the vector :math:`r(t)`. The modal transformation separates
    space and time from :math:`x(t)` such that :math:`x(t)=S r(t)`.
    Substituting into the governing equation:

    :math:`MS\ddot{r}(t)+KSr(t)=0`

    Premultiplying by :math:`S^T`

    :math:`S^TMS\ddot{r}(t)+S^TKSr(t)=0`

    The matrices :math:`S^TMS` and :math:`S^TKS` will be diagonalized by this
    process (:math:`u_i` are the eigenvectors of :math:`M^{-1}K`).

    If scaled properly (mass normalized so :math:`u_i^TMu_i=1`) then
    :math:`S^TMS=I` and :math:`S^TKS=\Omega^2` where :math:`\Omega^2` is a
    diagonal matrix of the natural frequencies squared in radians per second.

    Further, inverses are unstable so the better way to solve linear equations is with
    Gauss elimination.

    :math:`AB=C` given known :math:`A` and :math:`C`
    is solved using `la.solve(A, C, assume_a='pos')`.

    :math:`BA=C` given known :math:`A` and :math:`C` is solved by first
    transposing the equation to :math:`A^TB^T=C^T`, then solving for
    :math:`C^T`. The resulting command is
    `la.solve(A.T, C.T, assume_a='pos').T`

    Examples
    --------
    >>> M = np.array([[4, 0, 0],
    ...               [0, 4, 0],
    ...               [0, 0, 4]])
    >>> K = np.array([[8, -4, 0],
    ...               [-4, 8, -4],
    ...               [0, -4, 4]])
    >>> w, P, S, Sinv = modes_system_undamped(M, K)
    >>> w # doctest: +SKIP
    array([0.45, 1.25, 1.8 ])
    >>> S
    array([[ 0.16, -0.37, -0.3 ],
           [ 0.3 , -0.16,  0.37],
           [ 0.37,  0.3 , -0.16]])

    """
    L = la.cholesky(M)
    lam, P = _eigen(
        la.solve(L, la.solve(L, K, assume_a='pos').T, assume_a='pos').T)
    w = np.real(np.sqrt(lam))
    S = la.solve(L, P, assume_a="pos")
    Sinv = la.solve(L.T, P, assume_a="pos").T

    return w, P, S, Sinv
Example #49
0
    def fit_energy(self, X_glob, y, ncores=1):
        """Fit a Gaussian process regression model using local energies.

        Args:
            X_glob (list of lists of arrays): list of grouped training configurations
            y (np.ndarray): training total energies
            ncores (int): number of CPU workers to use, default is 1

        """
        self.kernel_ = self.kernel
        self.X_glob_train_ = X_glob
        self.y_train_energy_ = np.reshape(y, (y.shape[0], 1))

        if self.optimizer is not None:  # TODO Debug
            logger.warning("Optimizer not yet implemented for energy training")
            '''
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.log_marginal_likelihood_value_ = -np.min(lml_values)
            '''
        else:
            pass
        '''
        self.log_marginal_likelihood_value_ = \
        self.log_marginal_likelihood(self.kernel_.theta)
        '''

        # Precompute quantities required for predictions which are independent
        # of actual query points
        self.energy_K = self.kernel_.calc_gram_e(self.X_glob_train_, ncores)
        self.energy_K[np.diag_indices_from(self.energy_K)] += self.noise

        try:  # Use Cholesky decomposition to build the lower triangular matrix
            self.L_ = cholesky(self.energy_K, lower=True)
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'noise' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise

        # Calculate the alpha weights using the Cholesky method
        self.energy_alpha_ = cho_solve((self.L_, True), self.y_train_energy_)
        self.K = None
        self.alpha_ = None
        self.fitted[1] = 'energy'
        self.n_train = len(self.y_train_energy_)
        self.X_train_ = None

        return self
Example #50
0
def AddJitterOp(inputs: np.ndarray, initial_jitter_factor=INITIAL_JITTER_FACTOR,
                jitter_growth=JITTER_GROWTH, debug_log='false'):      
    """
    Finds smaller jitter to add to diagonal of square matrix to render the
    matrix positive definite (in that linalg.potrf works).

    Given input x (positive semi-definite matrix) and sigsq_init (nonneg
    scalar), find sigsq_final (nonneg scalar), so that:
        sigsq_final = sigsq_init + jitter, jitter >= 0,
        x + sigsq_final * Id positive definite (so that potrf call works)
    We return the matrix x + sigsq_final * Id, for which potrf has not failed.

    For the gradient, the dependence of jitter on the inputs is ignored.

    The values tried for sigsq_final are:
        sigsq_init, sigsq_init + initial_jitter * (jitter_growth ** k),
        k = 0, 1, 2, ...,
        initial_jitter = initial_jitter_factor * max(mean(diag(x)), 1)

    Note: The scaling of initial_jitter with mean(diag(x)) is taken from GPy.
    The rationale is that the largest eigenvalue of x is >= mean(diag(x)), and
    likely of this magnitude.

    There is no guarantee that the Cholesky factor returned is well-conditioned
    enough for subsequent computations to be reliable. A better solution
    would be to estimate the condition number of the Cholesky factor, and to add
    jitter until this is bounded below a threshold we tolerate. See

        Higham, N.
        A Survey of Condition Number Estimation for Triangular Matrices
        MIMS EPrint: 2007.10

    Algorithm 4.1 could work for us.
    """
    assert initial_jitter_factor > 0. and jitter_growth > 1.
    n_square = inputs.shape[0] - 1
    n = int(math.sqrt(n_square))
    assert n_square % n == 0 and n_square // n == n, "x must be square matrix, shape (n, n)"
    x, sigsq_init = np.reshape(inputs[:-1], (n, -1)), inputs[-1]
    
    def _get_constant_identity(x, constant):
        n, _ = x.shape
        return np.diag(np.ones((n,)) * constant)

    def _get_jitter_upperbound(x):
        # To define a safeguard in the while-loop of the forward,
        # we define an upperbound on the jitter we can reasonably add
        # the bound is quite generous, and is dependent on the scale of the input x
        # (the scale is captured via the trace of x)
        # the primary goal is avoid any infinite while-loop.
        return JITTER_UPPERBOUND_FACTOR * max(1., np.mean(np.diag(x)))

    jitter = 0.
    jitter_upperbound = _get_jitter_upperbound(x)
    must_increase_jitter = True
    x_plus_constant = None
    
    while must_increase_jitter and jitter <= jitter_upperbound:
        try:
            x_plus_constant = x + _get_constant_identity(
                x, sigsq_init + jitter)
            # Note: Do not use np.linalg.cholesky here, this can cause
            # locking issues
            L = spl.cholesky(x_plus_constant, lower=True)
            must_increase_jitter = False
        except spl.LinAlgError:
            if debug_log == 'true':
                logger.info("sigsq = {} does not work".format(
                    sigsq_init + jitter))
            if jitter == 0.0:
                jitter = initial_jitter_factor * max(1., np.mean(np.diag(x)))
            else:
                jitter = jitter * jitter_growth

    assert not must_increase_jitter, "The jitter ({}) has reached its upperbound ({}) while the Cholesky of the input matrix still cannot be computed.".format(jitter, jitter_upperbound)
    
    if debug_log == 'true':
        logger.info("sigsq_final = {}".format(sigsq_init + jitter))

    return x_plus_constant
Example #51
0
    def evalObjCon(self, x):
        '''
        Evaluate the objective (compliance) and constraint (mass)
        '''

        # Add the number of function evaluations
        self.fevals += 1

        # Convert the design variables with the scaling
        A = self.Area_scale * x[:]

        # Evaluate compliance objective
        self.assembleMat(A, self.K)
        self.assembleLoadVec(self.f)
        self.applyBCs(self.K, self.f)

        # Copy the values
        self.u[:] = self.f[:]

        # Perform the Cholesky factorization
        try:
            self.L = linalg.cholesky(self.K, lower=True)
        except Exception as excpt:
            print('Exception in cholesky factorization ', excpt)

        # Solve the resulting linear system of equations
        linalg.solve_triangular(self.L,
                                self.u,
                                lower=True,
                                trans='N',
                                overwrite_b=True)
        linalg.solve_triangular(self.L,
                                self.u,
                                lower=True,
                                trans='T',
                                overwrite_b=True)

        # Compute the compliance objective
        obj = np.dot(self.u, self.f)
        if self.obj_scale is None:
            self.obj_scale = obj / 10.0

        # Scale the compliance objective
        obj = obj / self.obj_scale

        # Compute the mass of the entire truss
        mass = 0.0
        index = 0

        for bar in self.conn:
            # Get the first and second node numbers from the bar
            n1 = bar[0]
            n2 = bar[1]

            # Compute the nodal locations
            xd = self.xpos[2 * n2] - self.xpos[2 * n1]
            yd = self.xpos[2 * n2 + 1] - self.xpos[2 * n1 + 1]
            Le = np.sqrt(xd**2 + yd**2)
            mass += self.rho * Le * A[index]

            index += 1

        # Create the array of constraints >= 0.0
        con = np.array([self.m_fixed - mass]) / self.mass_scale

        fail = 0
        return fail, obj, con
Example #52
0
    def fit(self, X, y):
        """Fit Gaussian process regression model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Feature vectors or other representations of training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values

        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.kernel_ = clone(self.kernel)

        self._rng = check_random_state(self.random_state)

        if self.kernel_.requires_vector_input:
            X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,
                                       ensure_2d=True, dtype="numeric")
        else:
            X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,
                                       ensure_2d=False, dtype=None)

        # Normalize target value
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            self._y_train_std = np.std(y, axis=0)

            # Remove mean and make unit variance
            y = (y - self._y_train_mean) / self._y_train_std

        else:
            self._y_train_mean = np.zeros(1)
            self._y_train_std = 1

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError("alpha must be a scalar or an array"
                                 " with same number of entries as y.(%d != %d)"
                                 % (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True, clone_kernel=False)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta,
                                                         clone_kernel=False)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.kernel_._check_bounds_params()

            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta,
                                             clone_kernel=False)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator."
                        % self.kernel_,) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        return self
Example #53
0
 def chol_gram(self, state):
     jacob_constr = self.jacob_constr(state)
     gram = jacob_constr @ self.mult_inv_metric(jacob_constr.T)
     return sla.cholesky(gram, lower=True)
Example #54
0
    def getcov(self, around):
      if rank==0:	
         N=self.N

         if (self.fixedcov):
            cov=zeros((N,N))
            for i in range(N):
                cov[i,i]=self.sigreg[i]**2
            if(self.verbose>0):
	       print cov
            G=Gaussian(around,cov)    
            return G

         icov=zeros((N,N))
         delta=self.sigreg/1000.0

      toget=[]
      toget.append(around)
        
        ### This is a kinda ugly hack
        ### We repeat the exactly the same loop twice.
        ### first populating where to evaluate like 
        ### and the popping hoping for perfect sync

      if rank==0:
         for i in range(N):
            parspi=around*1.0
            parsmi=around*1.0
            parspi[i]+=delta[i]
            parsmi[i]-=delta[i]
            for j in range(N):
                if (i==j):
                    toget.append(parspi)
                    toget.append(parsmi)
                else:
                    parspp=parspi*1.0
                    parspm=parspi*1.0
                    parsmp=parsmi*1.0
                    parsmm=parsmi*1.0
                    parspp[j]+=delta[j]
                    parspm[j]-=delta[j]
                    parsmp[j]+=delta[j]
                    parsmm[j]-=delta[j]
                    toget.append(parspp)
                    toget.append(parsmm)
                    toget.append(parspm)
                    toget.append(parsmp)

      likes=self.like(toget)
    
      if rank==0:
         like0=likes.pop(0)
         for i in range(N):
            for j in range(N):
                if (i==j):
                    der=(likes.pop(0)+likes.pop(0)-2*like0)/(delta[i]**2)
                else:
                    der=(likes.pop(0)+likes.pop(0)-likes.pop(0)-likes.pop(0))/(4*delta[i]*delta[j])
                icov[i,j]=-der
                icov[j,i]=-der

         while True:
            if(self.verbose>0):
	       print "Regularizing cholesky"
            for i in range(N):
                icov[i,i]+=1/self.sigreg[i]**2
            try:
                ch=la.cholesky(icov)
                break
            except:
                pass

         cov=la.inv(icov)
         if(self.verbose>0):
	   print cov
         G=Gaussian(around,self.blow*cov)    
         return G
Example #55
0
 def __init__(self, pot_energy, metric, grad_pot_energy=None):
     super().__init__(pot_energy, metric, grad_pot_energy)
     self.chol_metric = sla.cholesky(metric, lower=True)
Example #56
0
def chol(x):
    """Compute Cholesky factorization."""
    return linalg.cholesky(x).T
Example #57
0
    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = self.cov(comp_pend) + self.noise * np.eye(
                comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = (np.dot(
                pend_chol, npr.randn(pend.shape[0], self.pending_samples)) +
                         pend_m[:, None])

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1)
Example #58
0
        synapse_0_masuk = np.reshape(synapse_0_update, (1, -1))
        synapse_h_masuk = np.reshape(synapse_h_update, (1, -1))
        synapse_1_masuk = np.reshape(synapse_1_update,
                                     (1, -1))  # satu baris kesamping
        masuk = np.concatenate(
            (synapse_0_masuk, synapse_h_masuk, synapse_1_masuk), axis=1)

        #%% Inisialisasi UKF without filterpy

        X_ = masuk  # myu atau Wcap dari de lima
        n = X_.size  # julier versi masalah 'dimension of problem'
        lambda_ = alpha**2 * (n + kappa) - n

        #%% SIGMA POINTS around mean
        mean = np.sum(w_concat) / n  # mean secara keseluruhan
        U = cholesky((n + lambda_) * P)  # sama dg np.sqrt
        # U = np.sqrt(n+lambda_)*P

        sigmas = np.zeros((2 * n + 1, n))
        sigmas[
            0] = X_  # filterpy version dengan shape (121,60) karena dikali dg P!
        # maka....
        for k in range(n):  # gabung kebawah.. jadinya 121,60
            sigmas[k + 1] = np.subtract(X_, -U[k])
            sigmas[n + k + 1] = np.subtract(X_, U[k])

        #%% BOBOT SIGMA dari Merwe
        c_ = .5 / (n + lambda_)
        Wm = np.full(2 * n + 1, c_)
        Wc = Wm  # size (121,) atau (n,)
        Wc[0] = lambda_ / (n + lambda_) + (1 - alpha**2 + beta)
Example #59
0
def curve_fit(f,
              xdata,
              ydata,
              p0=None,
              sigma=None,
              absolute_sigma=False,
              check_finite=True,
              bounds=(-np.inf, np.inf),
              method=None,
              jac=None,
              **kwargs):
    """
    Use non-linear least squares to fit a function, f, to data.

    Assumes ``ydata = f(xdata, *params) + eps``

    Parameters
    ----------
    f : callable
        The model function, f(x, ...).  It must take the independent
        variable as the first argument and the parameters to fit as
        separate remaining arguments.
    xdata : array_like or object
        The independent variable where the data is measured.
        Should usually be an M-length sequence or an (k,M)-shaped array for
        functions with k predictors, but can actually be any object.
    ydata : array_like
        The dependent data, a length M array - nominally ``f(xdata, ...)``.
    p0 : array_like, optional
        Initial guess for the parameters (length N).  If None, then the
        initial values will all be 1 (if the number of parameters for the
        function can be determined using introspection, otherwise a
        ValueError is raised).
    sigma : None or M-length sequence or MxM array, optional
        Determines the uncertainty in `ydata`. If we define residuals as
        ``r = ydata - f(xdata, *popt)``, then the interpretation of `sigma`
        depends on its number of dimensions:

            - A 1-d `sigma` should contain values of standard deviations of
              errors in `ydata`. In this case, the optimized function is
              ``chisq = sum((r / sigma) ** 2)``.

            - A 2-d `sigma` should contain the covariance matrix of
              errors in `ydata`. In this case, the optimized function is
              ``chisq = r.T @ inv(sigma) @ r``.

              .. versionadded:: 0.19

        None (default) is equivalent of 1-d `sigma` filled with ones.
    absolute_sigma : bool, optional
        If True, `sigma` is used in an absolute sense and the estimated parameter
        covariance `pcov` reflects these absolute values.

        If False, only the relative magnitudes of the `sigma` values matter.
        The returned parameter covariance matrix `pcov` is based on scaling
        `sigma` by a constant factor. This constant is set by demanding that the
        reduced `chisq` for the optimal parameters `popt` when using the
        *scaled* `sigma` equals unity. In other words, `sigma` is scaled to
        match the sample variance of the residuals after the fit.
        Mathematically,
        ``pcov(absolute_sigma=False) = pcov(absolute_sigma=True) * chisq(popt)/(M-N)``
    check_finite : bool, optional
        If True, check that the input arrays do not contain nans of infs,
        and raise a ValueError if they do. Setting this parameter to
        False may silently produce nonsensical results if the input arrays
        do contain nans. Default is True.
    bounds : 2-tuple of array_like, optional
        Lower and upper bounds on parameters. Defaults to no bounds.
        Each element of the tuple must be either an array with the length equal
        to the number of parameters, or a scalar (in which case the bound is
        taken to be the same for all parameters.) Use ``np.inf`` with an
        appropriate sign to disable bounds on all or some parameters.

        .. versionadded:: 0.17
    method : {'lm', 'trf', 'dogbox'}, optional
        Method to use for optimization.  See `least_squares` for more details.
        Default is 'lm' for unconstrained problems and 'trf' if `bounds` are
        provided. The method 'lm' won't work when the number of observations
        is less than the number of variables, use 'trf' or 'dogbox' in this
        case.

        .. versionadded:: 0.17
    jac : callable, string or None, optional
        Function with signature ``jac(x, ...)`` which computes the Jacobian
        matrix of the model function with respect to parameters as a dense
        array_like structure. It will be scaled according to provided `sigma`.
        If None (default), the Jacobian will be estimated numerically.
        String keywords for 'trf' and 'dogbox' methods can be used to select
        a finite difference scheme, see `least_squares`.

        .. versionadded:: 0.18
    kwargs
        Keyword arguments passed to `leastsq` for ``method='lm'`` or
        `least_squares` otherwise.

    Returns
    -------
    popt : array
        Optimal values for the parameters so that the sum of the squared
        residuals of ``f(xdata, *popt) - ydata`` is minimized
    pcov : 2d array
        The estimated covariance of popt. The diagonals provide the variance
        of the parameter estimate. To compute one standard deviation errors
        on the parameters use ``perr = np.sqrt(np.diag(pcov))``.

        How the `sigma` parameter affects the estimated covariance
        depends on `absolute_sigma` argument, as described above.

        If the Jacobian matrix at the solution doesn't have a full rank, then
        'lm' method returns a matrix filled with ``np.inf``, on the other hand
        'trf'  and 'dogbox' methods use Moore-Penrose pseudoinverse to compute
        the covariance matrix.

    Raises
    ------
    ValueError
        if either `ydata` or `xdata` contain NaNs, or if incompatible options
        are used.

    RuntimeError
        if the least-squares minimization fails.

    OptimizeWarning
        if covariance of the parameters can not be estimated.

    See Also
    --------
    least_squares : Minimize the sum of squares of nonlinear functions.
    scipy.stats.linregress : Calculate a linear least squares regression for
                             two sets of measurements.

    Notes
    -----
    With ``method='lm'``, the algorithm uses the Levenberg-Marquardt algorithm
    through `leastsq`. Note that this algorithm can only deal with
    unconstrained problems.

    Box constraints can be handled by methods 'trf' and 'dogbox'. Refer to
    the docstring of `least_squares` for more information.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from scipy.optimize import curve_fit

    >>> def func(x, a, b, c):
    ...     return a * np.exp(-b * x) + c

    Define the data to be fit with some noise:

    >>> xdata = np.linspace(0, 4, 50)
    >>> y = func(xdata, 2.5, 1.3, 0.5)
    >>> np.random.seed(1729)
    >>> y_noise = 0.2 * np.random.normal(size=xdata.size)
    >>> ydata = y + y_noise
    >>> plt.plot(xdata, ydata, 'b-', label='data')

    Fit for the parameters a, b, c of the function `func`:

    >>> popt, pcov = curve_fit(func, xdata, ydata)
    >>> popt
    array([ 2.55423706,  1.35190947,  0.47450618])
    >>> plt.plot(xdata, func(xdata, *popt), 'r-',
    ...          label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

    Constrain the optimization to the region of ``0 <= a <= 3``,
    ``0 <= b <= 1`` and ``0 <= c <= 0.5``:

    >>> popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 1., 0.5]))
    >>> popt
    array([ 2.43708906,  1.        ,  0.35015434])
    >>> plt.plot(xdata, func(xdata, *popt), 'g--',
    ...          label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

    >>> plt.xlabel('x')
    >>> plt.ylabel('y')
    >>> plt.legend()
    >>> plt.show()

    """
    if p0 is None:
        # determine number of parameters by inspecting the function
        from scipy._lib._util import getargspec_no_self as _getargspec
        args, varargs, varkw, defaults = _getargspec(f)
        if len(args) < 2:
            raise ValueError("Unable to determine number of fit parameters.")
        n = len(args) - 1
    else:
        p0 = np.atleast_1d(p0)
        n = p0.size

    lb, ub = prepare_bounds(bounds, n)
    if p0 is None:
        p0 = _initialize_feasible(lb, ub)

    bounded_problem = np.any((lb > -np.inf) | (ub < np.inf))
    if method is None:
        if bounded_problem:
            method = 'trf'
        else:
            method = 'lm'

    if method == 'lm' and bounded_problem:
        raise ValueError("Method 'lm' only works for unconstrained problems. "
                         "Use 'trf' or 'dogbox' instead.")

    # optimization may produce garbage for float32 inputs, cast them to float64

    # NaNs can not be handled
    if check_finite:
        ydata = np.asarray_chkfinite(ydata, float)
    else:
        ydata = np.asarray(ydata, float)

    if isinstance(xdata, (list, tuple, np.ndarray)):
        # `xdata` is passed straight to the user-defined `f`, so allow
        # non-array_like `xdata`.
        if check_finite:
            xdata = np.asarray_chkfinite(xdata, float)
        else:
            xdata = np.asarray(xdata, float)

    if ydata.size == 0:
        raise ValueError("`ydata` must not be empty!")

    # Determine type of sigma
    if sigma is not None:
        sigma = np.asarray(sigma)

        # if 1-d, sigma are errors, define transform = 1/sigma
        if sigma.shape == (ydata.size, ):
            transform = 1.0 / sigma
        # if 2-d, sigma is the covariance matrix,
        # define transform = L such that L L^T = C
        elif sigma.shape == (ydata.size, ydata.size):
            try:
                # scipy.linalg.cholesky requires lower=True to return L L^T = A
                transform = cholesky(sigma, lower=True)
            except LinAlgError:
                raise ValueError("`sigma` must be positive definite.")
        else:
            raise ValueError("`sigma` has incorrect shape.")
    else:
        transform = None

    func = _wrap_func(f, xdata, ydata, transform)
    if callable(jac):
        jac = _wrap_jac(jac, xdata, transform)
    elif jac is None and method != 'lm':
        jac = '2-point'

    if method == 'lm':
        # Remove full_output from kwargs, otherwise we're passing it in twice.
        return_full = kwargs.pop('full_output', False)
        res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
        popt, pcov, infodict, errmsg, ier = res
        cost = np.sum(infodict['fvec']**2)
        if ier not in [1, 2, 3, 4]:
            raise RuntimeError("Optimal parameters not found: " + errmsg)
    else:
        # Rename maxfev (leastsq) to max_nfev (least_squares), if specified.
        if 'max_nfev' not in kwargs:
            kwargs['max_nfev'] = kwargs.pop('maxfev', None)

        res = least_squares(func,
                            p0,
                            jac=jac,
                            bounds=bounds,
                            method=method,
                            **kwargs)

        if not res.success:
            raise RuntimeError("Optimal parameters not found: " + res.message)

        cost = 2 * res.cost  # res.cost is half sum of squares!
        popt = res.x

        # Do Moore-Penrose inverse discarding zero singular values.
        _, s, VT = svd(res.jac, full_matrices=False)
        threshold = np.finfo(float).eps * max(res.jac.shape) * s[0]
        s = s[s > threshold]
        VT = VT[:s.size]
        pcov = np.dot(VT.T / s**2, VT)
        return_full = False

    warn_cov = False
    if pcov is None:
        # indeterminate covariance
        pcov = zeros((len(popt), len(popt)), dtype=float)
        pcov.fill(inf)
        warn_cov = True
    elif not absolute_sigma:
        if ydata.size > p0.size:
            s_sq = cost / (ydata.size - p0.size)
            pcov = pcov * s_sq
        else:
            pcov.fill(inf)
            warn_cov = True

    if warn_cov:
        warnings.warn('Covariance of the parameters could not be estimated',
                      category=OptimizeWarning)

    if return_full:
        return popt, pcov, infodict, errmsg, ier
    else:
        return popt, pcov
Example #60
0
    def fit(self, X_l, y_l, X_h, y_h):
        """Fit Gaussian process regression model.

        Parameters
        ----------
        X_l : array-like, shape = (n_l_samples, n_features)
            Training data

        y_l : array-like, shape = (n_l_samples, [n_output_dims])
            Target values

        X_h : array-like, shape = (n_h_samples, n_features)
            Training data

        y_h : array-like, shape = (n_h_samples, [n_output_dims])
            Target values

        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_l_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.kernel_l_ = clone(self.kernel)
        self.kernel_d_ = clone(self.kernel_l_)

        self.rng = check_random_state(self.random_state)

        X_l, y_l = check_X_y(X_l, y_l, multi_output=True, y_numeric=True)
        X_h, y_h = check_X_y(X_h, y_h, multi_output=True, y_numeric=True)
        self.n_l_ = len(X_l)

        # Normalize target value
        if self.normalize_y:
            self._y_l_train_mean = np.mean(y_l, axis=0)
            self._y_h_train_mean = np.mean(y_h, axis=0)
            # demean y
            y_l = y_l - self._y_l_train_mean
            y_h = y_h - self._y_h_train_mean
        else:
            self._y_l_train_mean = np.zeros(1)
            self._y_h_train_mean = np.zeros(1)

        self.X_train_ = np.vstack((X_l, X_h))
        self.y_train_ = np.hstack((y_l, y_h))

        theta_initial = np.hstack(
            (np.array([self.rho]), self.kernel_l_.theta, self.kernel_d_.theta))
        if self.optimizer is not None and self.kernel_l_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=self.eval_gradient):
                if eval_gradient:
                    raise Warning(
                        "eval_gradient = True mode is not implemented yet!")
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta)

            theta_bounds = np.r_[np.array(self.rho_bounds)[np.newaxis],
                                 self.kernel_l_.bounds, self.kernel_d_.bounds]
            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func, theta_initial,
                                                      theta_bounds,
                                                      self.eval_gradient))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                flag = np.isfinite(self.kernel_l_.bounds).all() and \
                    np.isfinite(self.kernel_d_.bounds).all() and \
                    np.isfinite(self.rho_bounds).all()
                if not flag:
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = np.vstack(
                    (np.array(self.rho_bounds).reshape(1, -1),
                     self.kernel_l_.bounds, self.kernel_d_.bounds))
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = np.hstack(
                        (self.rng.uniform(bounds[0, 0], bounds[0, 1]),
                         np.exp(self.rng.uniform(bounds[1:, 0], bounds[1:,
                                                                       1]))))
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds,
                                                       self.eval_gradient))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            best_hyperparams = optima[np.argmin(lml_values)][0]
            self.rho = best_hyperparams[0]
            self.kernel_l_.theta = best_hyperparams[1:1 +
                                                    len(self.kernel_l_.theta)]
            self.kernel_d_.theta = best_hyperparams[1 +
                                                    len(self.kernel_l_.theta):]
            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(theta_initial)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K_lf = self.kernel_l_(self.X_train_[:self.n_l_])
        K = np.vstack((
            np.hstack((self.kernel_l_(self.X_train_[:self.n_l_]),
                       self.rho * self.kernel_l_(self.X_train_[:self.n_l_],
                                                 self.X_train_[self.n_l_:]))),
            np.hstack((
                self.rho * self.kernel_l_(self.X_train_[self.n_l_:],
                                          self.X_train_[:self.n_l_]),
                self.rho**2 * self.kernel_l_(self.X_train_[self.n_l_:])
                +  # noqa W504
                self.kernel_d_(self.X_train_[self.n_l_:])))))
        K_lf[np.diag_indices_from(K_lf)] += self.alpha
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_lf_ = cholesky(K_lf, lower=True)  # Line 2 (lf)
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
            self._K_lf_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator.", ) + exc.args
            raise
        self.alpha_lf_ = cho_solve((self.L_lf_, True),
                                   self.y_train_[:self.n_l_])  # Line 3 (Lf)
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        return self