Example #1
0
def HolsteinPrimakoff( generation, lattice = "cactus", periodic = True ):
    """
    Given a generation we want to build the cactus out to construct the matrix
    and return the eigenvalues.  We will do this in the semi-roundabout way
    proposed by Mucciolo, Castro Neto, and Chamon in PRB 69 (214424), so what
    is returned is the true eigenspectrum and an eigenvalue matrix representing
    the rotations of a boguliobov-type transformation.
    """
    if lattice == "cactus":
        H = HusimiHamiltonian( generation, periodic )
    elif lattice == "triangle":
        H = TriangleHamiltonian( generation )
    else:
        raise ValueError, "Options are 'cactus' and 'triangle'"
        
    l = H.shape[0] / 2
    K = H[:l, :l]
    L = H[:l, l:]
    
    squaredDiff = scipy.dot(K, K) - scipy.dot(L, L)
    commutator = scipy.dot(L,K) - scipy.dot(K,L)
    
    if scipy.sum(commutator) == 0.0:
        eigVals, eigVects = scipy.linalg.eigh( squaredDiff )
    else:
        eigVals, eigVects = scipy.linalg.eig( squaredDiff - commutator )
    
    #   The 'real' is not a cheat -- zero values could be negative as a result
    #   of roundoff, this takes that into account.
    eigVals = scipy.real( scipy.sqrt( eigVals ) )
    
    return eigVals, eigVects
Example #2
0
def cov_dvrpmllbb_to_vxyz_single(d,e_d,e_vr,pmll,pmbb,cov_pmllbb,l,b):
    """
    NAME:
       cov_dvrpmllbb_to_vxyz
    PURPOSE:
       propagate distance, radial velocity, and proper motion uncertainties to
       Galactic coordinates for scalar inputs
    INPUT:
       d - distance [kpc, as/mas for plx]
       e_d - distance uncertainty [kpc, [as/mas] for plx]
       e_vr  - low velocity uncertainty [km/s]
       pmll - proper motion in l (*cos(b)) [ [as/mas]/yr ]
       pmbb - proper motion in b [ [as/mas]/yr ]
       cov_pmllbb - uncertainty covariance for proper motion
       l - Galactic longitude [rad]
       b - Galactic lattitude [rad]
    OUTPUT:
       cov(vx,vy,vz) [3,3]
    HISTORY:
       2010-04-12 - Written - Bovy (NYU)
    """
    M= _K*sc.array([[pmll,d,0.],[pmbb,0.,d]])
    cov_dpmllbb= sc.zeros((3,3))
    cov_dpmllbb[0,0]= e_d**2.
    cov_dpmllbb[1:3,1:3]= cov_pmllbb
    cov_vlvb= sc.dot(M,sc.dot(cov_dpmllbb,M.T))
    cov_vrvlvb= sc.zeros((3,3))
    cov_vrvlvb[0,0]= e_vr**2.
    cov_vrvlvb[1:3,1:3]= cov_vlvb
    R= sc.array([[m.cos(l)*m.cos(b), m.sin(l)*m.cos(b), m.sin(b)],
                 [-m.sin(l),m.cos(l),0.],
                 [-m.cos(l)*m.sin(b),-m.sin(l)*m.sin(b), m.cos(b)]])
    return sc.dot(R.T,sc.dot(cov_vrvlvb,R))
def get_stderr_fit(f,Xdata,popt,pcov):
    
   Y= f(Xdata, popt)
   listdY=[]
   for i in xrange(len(popt)):
       p=popt[i]
       dp= abs(p)/1e6+1e-20
       popt[i]+=dp
       Yi= f(Xdata, popt)
       dY= (Yi-Y)/dp
       listdY.append(dY)
       popt[i]-=dp
   listdY= scipy.array(listdY)
   #list dy is the d in the derivation. it has N X M
   #pcov is N X N
   
   left= scipy.dot(listdY.T,pcov)
   right=scipy.dot(left,listdY)
   
   sigma2y= right.diagonal()
   #sigma2y is a standard function of fit
   mean_sigma2y= scipy.mean(right.diagonal())
   
   M= Xdata.shape[0]
   N= len(popt)
   avg_stddev_data=scipy.sqrt(M*mean_sigma2y/N)
   sigmay= scipy.sqrt(sigma2y)
   return sigmay,avg_stddev_data
Example #4
0
def dot_fromfeatures(features1,
                     features2 = None):

    if features2 is None:
        features2 = features1

    npoints1 = features1.shape[0]
    npoints2 = features2.shape[0]

    features1.shape = npoints1, -1
    features2.shape = npoints2, -1

    ndims = features1.shape[1]
    assert(features2.shape[1] == ndims)

    if ndims < DOT_MAX_NDIMS:
        out = sp.dot(features1, features2.T)
    else:
        out = sp.dot(features1[:,:DOT_MAX_NDIMS], 
                     features2[:,:DOT_MAX_NDIMS].T)
        ndims_done = DOT_MAX_NDIMS            
        while ndims_done < ndims:
            out += sp.dot(features1[:,ndims_done:ndims_done+DOT_MAX_NDIMS], 
                          features2[:,ndims_done:ndims_done+DOT_MAX_NDIMS].T)
            ndims_done += DOT_MAX_NDIMS
            
    return out
 def fgmres(self,rhs,tol=1e-6,restrt=None,maxiter=None,callback=None):
     if maxiter == None:
         maxiter = len(rhs)
     if restrt == None:
         restrt = 2*maxiter
     # implemented as in [Saad, 1993]
     # start
     x = zeros(len(rhs))
     H = zeros((restrt+1, restrt))
     V = zeros((len(rhs),restrt))
     Z = zeros((len(rhs),restrt))
     # Arnoldi process (with modified Gramm-Schmidt)
     res = 1.
     j = 0
     r = rhs - self.point.matvec(x)
     beta = norm(r)
     V[:,0]=r/beta
     while j < maxiter and res > tol:
         Z[:,j] = self.point.psolve(V[:,j])
         w = self.point.matvec(Z[:,j])
         for i in range(j+1):
             H[i,j]=dot(w,V[:,i])
             w = w - H[i,j]*V[:,i]
         H[j+1,j] = norm(w)
         V[:,j+1]=w/H[j+1,j]
         e = zeros(j+2)
         e[0]=1.
         y, res, rank, sing_val = lstsq(H[:j+2,:j+1],beta*e)
         j += 1
         print "# GMRES| iteration :", j, "res: ", res/beta
         self.resid = r_[self.resid,res/beta]
         Zy = dot(Z[:,:j],y)
     x = x + Zy
     info = 1
     return (x,info)
Example #6
0
def snr_maha(waveforms, invC, mu=None):
    """SNR from Mahalanobis distance (generalised euclidean distance)

    Definition of signal to noise ratio (SNR) as derived from the Mahalanobis
    distance. For C=eye this is equivalent to snr_power.

    :type waveforms: ndarray
    :param waveforms: waveform data (signal), one per row
    :type invC: ndarray
    :param invC: noise covariance matrix (a block toeplitz matrix)
    :type mu: ndarray
    :param mu: mean correction. Usually we assume zero-mean waveforms,
        so if this is None it will be ignored.
        Default=None
    :returns: ndarray - SNR per waveform
    """

    # inits and checks
    n, dim = waveforms.shape
    if dim != invC.shape[0] != invC.shape[1]:
        raise ValueError("dimension mismatch for waveforms and covariance")
    rval = sp.zeros(n)

    # correct for mu
    if mu is not None:
        if mu.shape != (dim,):
            raise ValueError("dimension mismatch for waveforms and mu")
        waveforms -= mu

    # compute
    for i in xrange(n):
        rval[i] = sp.dot(sp.dot(waveforms[i], invC), waveforms[i].T)
        rval[i] /= float(dim)
    return sp.sqrt(rval)
Example #7
0
def calcInvFisher(sigma, invSigma=None, factorSigma=None):
    """ Efficiently compute the exact inverse of the FIM of a Gaussian.
    Returns a list of the diagonal blocks. """
    if invSigma == None:
        invSigma = inv(sigma)
    if factorSigma == None:
        factorSigma = cholesky(sigma)
    dim = sigma.shape[0]

    invF = [mat(1 / (invSigma[-1, -1] + factorSigma[-1, -1] ** -2))]
    invD = 1 / invSigma[-1, -1]
    for k in reversed(list(range(dim - 1))):
        v = invSigma[k + 1:, k]
        w = invSigma[k, k]
        wr = w + factorSigma[k, k] ** -2
        u = dot(invD, v)
        s = dot(v, u)
        q = 1 / (w - s)
        qr = 1 / (wr - s)
        t = -(1 + q * s) / w
        tr = -(1 + qr * s) / wr
        invF.append(blockCombine([[qr, tr * u], [mat(tr * u).T, invD + qr * outer(u, u)]]))
        invD = blockCombine([[q , t * u], [mat(t * u).T, invD + q * outer(u, u)]])

    invF.append(sigma)
    invF.reverse()
    return invF
Example #8
0
 def K_grad_i_dot(self, M, i):
     if i < self.Cr.getNumberParams():
         R = sp.dot(self.W(), sp.dot(self.W_grad_i(i).T, M))
         R+= sp.dot(self.W_grad_i(i), sp.dot(self.W().T, M))
     else:
         R = self.d_grad_i(i-self.Cr.getNumberParams())[:, sp.newaxis] * M
     return R
Example #9
0
def rlsloo_ll1( V, D, Y, lambd):
	"""
	Computes cs and the actual LOO errors for a single value of lambda. (lambd)
	"""
        n = V.shape[0]
        cl = Y.shape[1]
        
        inner  = 1/(D + lambd)
	inner = inner.conj()
        VtY = sp.dot(V.T, Y)
	VtY = VtY.conj()

        # Because of signs of D are flipped (scipy.linalg.eig returns
        # flipped signs for complex part of the eigenvalues)
        in_dot = sp.ones((n,1)) * inner
        ViD = V * in_dot
        cs = sp.dot(ViD, VtY)
        dGi = sp.sum(ViD*V, axis = 1)
        # -- till here works fine
        #check matrix dimensions
        looerrs = cs.ravel()/sp.real(dGi.ravel())
	looerrs = sp.real(looerrs)
        cs = sp.real(cs.transpose())

        return cs.ravel(), looerrs
def Xgen(X0,Z,PP,QQ,Xbar):
    """
    This function generates a history of X given a history
    technology shocks (Z), a P matrix, a Q matrix, and an
    intial X (X0).
    Note Xt(tilde) = PXt-1(tilde) +QZt(tilde)
    Xt=Xbar*e^Xt(tilde)
    """
    num_endog=sp.shape(PP)[1]
    T=len(Z)#sp.shape(Z)[0]
    #display(T)
    X=sp.zeros((num_endog,T))
    X[:,0]=X0
    for i in range(1,T):
        Zt=Z[i]
        Xt_1=sp.zeros((num_endog,1))
        for j in range(num_endog):
            Xt_1[j,0]=X[j,i-1]
        Xt=sp.dot(PP,Xt_1)+sp.dot(QQ,Zt)
        for k in range(num_endog):
            X[k,i]=Xt[k,0]
    exponents=sp.exp(X)
    for p in range(T):
        for q in range(num_endog):
            X[q,p]=Xbar[0,q]*exponents[q,p]
    return X
Example #11
0
 def Areml_K_grad_i(self,i):
     i = self.covar._actindex2index(i)
     R = sp.dot(self.WcCtildeLcA_o_WrRF(i).T, self.dWLW())
     R+= R.T
     R+= -self.ALcCtildeLcA_o_FRF(i)
     R+= -sp.dot(self.dWLW().T, self.Cbar_o_Sr_dWLW(i))
     return R
Example #12
0
    def _LMLgrad_lik(self,hyperparams):
        """derivative of the likelihood parameters"""

	logtheta = hyperparams['covar']
        try:   
            KV = self.get_covariances(hyperparams)
        except linalg.LinAlgError:
            LG.error("exception caught (%s)" % (str(hyperparams)))
            return 1E6
	
        #loop through all dimensions
        #logdet term:
        Kd = 2*KV['Knoise']
        dldet = 0.5*(Kd*KV['Si']).sum(axis=0)
        #quadratic term
        y_roti = KV['y_roti']
        dlquad = -0.5 * (y_roti * Kd * y_roti).sum(axis=0)
        if VERBOSE:
            dldet_  = SP.zeros([self.d])
            dlquad_ = SP.zeros([self.d])
            for d in xrange(self.d):
                _K = KV['K'] + SP.diag(KV['Knoise'][:,d])
                _Ki = SP.linalg.inv(_K)
                dldet_[d] = 0.5* SP.dot(_Ki,SP.diag(Kd[:,d])).trace()
                dlquad_[d] = -0.5*SP.dot(self.y[:,d],SP.dot(_Ki,SP.dot(SP.diag(Kd[:,d]),SP.dot(_Ki,self.y[:,d]))))

            assert (SP.absolute(dldet-dldet_)<1E-3).all(), 'outch'
            assert (SP.absolute(dlquad-dlquad_)<1E-3).all(), 'outch'


        LMLgrad = dldet + dlquad
        RV = {'lik': LMLgrad}
    
        return RV
Example #13
0
    def _LML_covar(self, hyperparams):
        """

	log marginal likelihood contributions from covariance hyperparameters

	"""
        try:   
            KV = self.get_covariances(hyperparams)
        except linalg.LinAlgError:
            LG.error("exception caught (%s)" % (str(hyperparams)))
            return 1E6

        #all in one go
        #negative log marginal likelihood, see derivations
        lquad = 0.5* (KV['y_rot']*KV['Si']*KV['y_rot']).sum()
        ldet  = 0.5*-SP.log(KV['Si'][:,:]).sum()
        LML   = 0.5*self.n*self.d * SP.log(2*SP.pi) + lquad + ldet
        if VERBOSE:
            #1. slow and explicit way
            lmls_ = SP.zeros([self.d])
            for i in xrange(self.d):
                _y = self.y[:,i]
                sigma2 = SP.exp(2*hyperparams['lik'])
                _K = KV['K'] + SP.diag(KV['Knoise'][:,i])
                _Ki = SP.linalg.inv(_K)
                lquad_ = 0.5 * SP.dot(_y,SP.dot(_Ki,_y))
                ldet_ = 0.5 * SP.log(SP.linalg.det(_K))
                lmls_[i] = 0.5 * self.n* SP.log(2*SP.pi) + lquad_ + ldet_
            assert SP.absolute(lmls_.sum()-LML)<1E-3, 'outch'
        return LML
Example #14
0
def GP_sample_posterior(covar,X,logtheta,x,y,ns=1):
    """
    Sample from the posterior distribution of a GP
    
    x : [double]
        training inputs

    y : [double]
        training targets

    other :
        See :py:func:`gp_sample.GP_sample_prior`
    """

    KXx = covar.K(logtheta,x,X)
    KXX = covar.K(logtheta,X)
    Kxx = covar.K(logtheta,x)

    iKxx = SP.linalg.inv(Kxx+eye(Kxx.shape[0])*0.01)

    mu = SP.dot(KXx.T,SP.dot(iKxx,y)).reshape([-1,1])
    cov = KXX - SP.dot(KXx.T,SP.dot(iKxx,KXx))   
    L  = SP.linalg.cholesky(cov).T
    Y  = mu + SP.dot(L,random.randn(X.shape[0],ns))
    return Y
Example #15
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        if self.onesigma:
            # algorithm for one global sigma for all mu's
            expln_params = expln(self.params)
            sumxsquared = dot(self.state, self.state)
            self._derivs += (
                sum((outbuf - inbuf) ** 2 - expln_params ** 2 * sumxsquared) / expln_params * explnPrime(self.params)
            )
            inerr[:] = outbuf - inbuf

            if not self.autoalpha and sumxsquared != 0:
                inerr /= expln_params ** 2 * sumxsquared
                self._derivs /= expln_params ** 2 * sumxsquared
        else:
            # Algorithm for seperate sigma for each mu
            expln_params = expln(self.params).reshape(len(outbuf), len(self.state))
            explnPrime_params = explnPrime(self.params).reshape(len(outbuf), len(self.state))

            idx = 0
            for j in xrange(len(outbuf)):
                sigma_subst2 = dot(self.state ** 2, expln_params[j, :] ** 2)
                for i in xrange(len(self.state)):
                    self._derivs[idx] = (
                        ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2)
                        / sigma_subst2
                        * self.state[i] ** 2
                        * expln_params[j, i]
                        * explnPrime_params[j, i]
                    )
                    if self.autoalpha and sigma_subst2 != 0:
                        self._derivs[idx] /= sigma_subst2
                    idx += 1
                inerr[j] = outbuf[j] - inbuf[j]
                if not self.autoalpha and sigma_subst2 != 0:
                    inerr[j] /= sigma_subst2
Example #16
0
    def update_step(self, input_signal=None, teaching_signal=None):
        """update the network with the given input and teach_output, input_signal and teaching_signal must be a column vector
        notice that input_signal is u(n+1) and output is output(n+1) 
        this step makes state(n) -> state(n+1)
        the x_history is a list of state's state_history , every item is a row vector like (100L,)"""

        if input_signal != None:
            assert input_signal.shape == (self.input_unit_amount, 1)
        if teaching_signal != None:
            assert teaching_signal.shape == (self.output_unit_amount, 1)

        if self.feedback_matrix != None and self.input_matrix != None:
            self.state = self.unit_type_ufunc(sp.dot(self.input_matrix, input_signal) + sp.dot(self.internal_matrix, self.state) + sp.dot(self.feedback_matrix, self.output))
            if teaching_signal == None:
                self.output = sp.dot(self.output_matrix, sp.append(input_signal.T,self.state.T).T)
            else:
                self.output = teaching_signal
        elif self.feedback_matrix != None:
            self.state = self.unit_type_ufunc(sp.dot(self.internal_matrix, self.state) + sp.dot(self.feedback_matrix, self.output))
            if teaching_signal == None:
                self.output = sp.dot(self.output_matrix, self.state)
            else:
                self.output = teaching_signal
        else:
            self.state = self.unit_type_ufunc(sp.dot(self.input_matrix, input_signal) + sp.dot(self.internal_matrix, self.state))
        if input_signal != None:
            self.state_history.append(sp.append(input_signal.T, self.state.T))
        else:
            self.state_history.append(self.state.reshape(-1))
        self.output_history.append(self.output)
Example #17
0
def ar_fit(p_data, p_or_plist=range(100), selector='sbc'):
    """fits a (multivariate) AR (_A_uto_R_egrssive) model to data

    :Parameters:
        p_data : ndarray
            Data with observations on the rows and variables on the columns
        p_or_plist : list
            List of model orders to select from. This list has to be continuous
            with a step size of 1, e.g. [10,11,12,13,14]
        selector : str
            One of 'sbc' for the Schwarz Bayesian Criterion or 'fpe' for the
            log of Akaike's Final Prediction Error. This determines what metric
            is used to evaluate the best model order.
    """

    # checks and inits
    if not isinstance(p_data, N.ndarray):
        raise ValueError('p_data is not an ndarray')
    data = p_data.copy()
    n, m = data.shape
    if selector not in ['sbc', 'fpe']:
        raise ValueError('selector has to be one of: "sbc" or "fpe"!')
    if not isinstance(p_or_plist, list):
        p_or_plist = [p_or_plist]
    p_max = max(p_or_plist)
    ne = n - p_max
    npmax = m * p_max
    if ne <= npmax:
        raise ValueError('time series to short!')
    R = _ar_model_qr(data, p_max)

    # model order selection
    if len(p_or_plist) > 1:
        sbc, fpe, ldp, np = _ar_model_select(R, m, ne, p_or_plist)
        if selector == 'sbc':
            crit = sbc
        elif selector == 'fpe':
            crit = fpe
    else:
        crit = N.zeros(1)
    p_opt = crit.argmin()
    np = m * p_opt

    # get lower right triangle of R
    #
    #     | R11  R12 |
    # R = |          |
    #     |  0   R22 |
    #
    R11 = R[:np, :np]
    R12 = R[:np, npmax:]
    R22 = R[np:, npmax:]

    # build the model
    A = N.dot(NL.inv(R11), R12).T
    C = N.dot(R22.T, R22) / (ne - np)

    # return
    del R, R11, R12, R22
    return A, C, crit
Example #18
0
    def learn(self, X, t, tol=0.01, amax=1e10):
        u"""学習"""

        N = X.shape[0]
        a = sp.ones(N+1) # hyperparameter
        b = 1.0
        phi = sp.ones((N, N+1)) # design matrix
        phi[:,1:] = [[self._kernel(xi, xj) for xj in X] for xi in X]

        diff = 1
        while diff >= tol:
            sigma = spla.inv(sp.diag(a) + b * sp.dot(phi.T, phi))
            m = b * sp.dot(sigma, sp.dot(phi.T, t))
            gamma = sp.ones(N+1) - a * sigma.diagonal()
            anew = gamma / (m * m)
            bnew = (N -  gamma.sum()) / sp.square(spla.norm(t - sp.dot(phi, m)))
            anew[anew >= amax] = amax
            adiff, bdiff = anew - a, bnew - b
            diff = (adiff * adiff).sum() + bdiff * bdiff
            a, b = anew, bnew
            print ".",

        self._a = a
        self._b = b
        self._X = X
        self._m = m
        self._sigma = sigma
        self._amax = amax
Example #19
0
    def solve_pressure_eigenproblem(self, mtx, eig_problem=None,
                                    n_eigs=0, check=False):
        """G = B*AI*BT or B*AI*BT+D"""

        def get_slice(n_eigs, nn):
            if n_eigs > 0:
                ii = slice(0, n_eigs)
            elif n_eigs < 0:
                ii = slice(nn + n_eigs, nn)
            else:
                ii = slice(0, 0)
            return ii

        eig_problem = get_default(eig_problem, self.eig_problem)
        n_eigs = get_default(n_eigs, self.n_eigs)
        check = get_default(check, self.check)

        mtx_c, mtx_b, action_aibt = mtx['C'], mtx['B'], mtx['action_aibt']
        mtx_g = mtx_b * action_aibt.to_array() # mtx_b must be sparse!
        if eig_problem == 'B*AI*BT+D':
            mtx_g += mtx['D'].toarray()

        mtx['G'] = mtx_g
        output(mtx_c.shape, mtx_g.shape)

        eigs, mtx_q = eig(mtx_c.toarray(), mtx_g, method='eig.sgscipy')

        if check:
            ee = nm.diag(sc.dot(mtx_q.T * mtx_c, mtx_q)).squeeze()
            oo = nm.diag(sc.dot(sc.dot(mtx_q.T,  mtx_g), mtx_q)).squeeze()
            try:
                assert_(nm.allclose(ee, eigs))
                assert_(nm.allclose(oo, nm.ones_like(eigs)))
            except ValueError:
                debug()

        nn = mtx_c.shape[0]
        if isinstance(n_eigs, tuple):
            output('required number of eigenvalues: (%d, %d)' % n_eigs)
            if sum(n_eigs) < nn:
                ii0 = get_slice(n_eigs[0], nn)
                ii1 = get_slice(-n_eigs[1], nn)
                eigs = nm.concatenate((eigs[ii0], eigs[ii1]))
                mtx_q = nm.concatenate((mtx_q[:,ii0], mtx_q[:,ii1]), 1) 
        else:
            output('required number of eigenvalues: %d' % n_eigs)
            if (n_eigs != 0) and (abs(n_eigs) < nn):
                ii = get_slice(n_eigs, nn)
                eigs = eigs[ii]
                mtx_q = mtx_q[:,ii]

##         from sfepy.base.plotutils import pylab, iplot
##         pylab.semilogy(eigs)
##         pylab.figure(2)
##         iplot(eigs)
##         pylab.show()
##         debug()

        out = Struct(eigs=eigs, mtx_q=mtx_q)
        return out
Example #20
0
def multivariateNormalPdf(z, x, sigma):
    """ The pdf of a multivariate normal distribution (not in scipy).
    The sample z and the mean x should be 1-dim-arrays, and sigma a square 2-dim-array. """
    assert len(z.shape) == 1 and len(x.shape) == 1 and len(x) == len(z) and sigma.shape == (len(x), len(z))
    tmp = -0.5 * dot(dot((z - x), inv(sigma)), (z - x))
    res = (1. / power(2.0 * pi, len(z) / 2.)) * (1. / sqrt(det(sigma))) * exp(tmp)
    return res
 def get(self):
     B = dok_matrix((self.rows,self.d), dtype=float32)
     for ((row,col,val),p) in self.sampler.get(with_probabilities=True):
         B[row,col] += val/(p*self.nnz)
     covariance = dot(B.transpose(),B)    
     (_,s,Vt) = svds(covariance, k=self.ell, maxiter=50, return_singular_vectors=True)
     return dot(diag(sqrt(s[:self.ell])), Vt[:self.ell,:])
Example #22
0
    def dlsim( self, u, x0 = None, Tl = 0, Ts = 0.001 ):
        """
        @summary: Simulate the motor for one input
        
        @param u: The control signal
        @param Ts: Sampling time (0.001 by default)
        @param x0: The initial conditions on the state vector (zero by default).
        
        @return: The system response  
        """
        if not x0 is None:
            self.x0 = x0      
            
        
        if self.x0 is None:
            self.x0 = zeros( ( 5, 1 ) )
            
        self.x0[4, 0] = Tl
        ( self.Ad, self.Bd, self.Cd, self.Dd ) = self.dss( self.x0, Ts )    

        self.x0 = dot( self.Ad, self.x0 ) + dot( self.Bd, u )         
        y_out = dot( self.Cd, self.x0 )  # + dot( self.Dd, u )    

        
        return ( y_out, self.x0 )
Example #23
0
def fastsvd(M):
    """ Fast Singular Value Decomposition
    
    Inputs:
      M -- 2d numpy array

    Outputs:
      U,S,V -- see scipy.linalg.svd    

    """
    
    h, w = M.shape
    
    # -- thin matrix
    if h >= w:
        # subspace of M'M
        U, S, V = N.linalg.svd(N.dot(M.T, M))
        U = N.dot(M, V.T)
        # normalize
        for i in xrange(w):
            S[i] = fastnorm(U[:,i])
            U[:,i] = U[:,i] / S[i]
            
    # -- fat matrix
    else:
        # subspace of MM'
        U, S, V = N.linalg.svd(N.dot(M, M.T))
        V = N.dot(U.T, M)
        # normalize
        for i in xrange(h):
            S[i] = fastnorm(V[i])
            V[i,:] = V[i] / S[i]
            
    return U, S, V
def get_stderr_fit(f,Xdata,popt,pcov):
	Y=f(Xdata,popt)
	listdY=[]
	for i in xrange(len(popt)):
		p=popt[i]
		dp=abs(p)/1e6+1e-20
		popt[i]+=dp
		Yi=f(Xdata,popt)
		dY=(Yi-Y)/dp
		listdY.append(dY)
		popt[i]-=dp
	listdY=scipy.array(listdY)
	#listdY is an array with N rows and M columns, N=len(popt), M=len(xdata[0])
	#pcov is an array with N rows and N columns
	left=scipy.dot(listdY.T,pcov) 
	#left is an array of M rows and N columns
	right=scipy.dot(left,listdY)
	#right is an array of M rows and M columns
	sigma2y=right.diagonal()
	#sigma2y is standard error of fit and function  of X
	mean_sigma2y=scipy.mean(right.diagonal())
	M=Xdata.shape[1];print M
	N=len(popt);print N
	avg_stddev_data=scipy.sqrt(M*mean_sigma2y/N)
	#this is because if exp error is constant at sig_dat,then mean_sigma2y=N/M*sig_dat**2
	sigmay=scipy.sqrt(sigma2y)
	return sigmay,avg_stddev_data
Example #25
0
    def _learnStep(self):
        """ Main part of the algorithm. """
        I = eye(self.numParameters)
        self._produceSamples()
        utilities = self.shapingFunction(self._currentEvaluations)
        utilities /= sum(utilities)  # make the utilities sum to 1
        if self.uniformBaseline:
            utilities -= 1./self.batchSize
        samples = array(map(self._base2sample, self._population))

        dCenter = dot(samples.T, utilities)
        covGradient = dot(array([outer(s,s) - I for s in samples]).T, utilities)
        covTrace = trace(covGradient)
        covGradient -= covTrace/self.numParameters * I
        dA = 0.5 * (self.scaleLearningRate * covTrace/self.numParameters * I
                    +self.covLearningRate * covGradient)

        self._lastLogDetA = self._logDetA
        self._lastInvA = self._invA

        self._center += self.centerLearningRate * dot(self._A, dCenter)
        self._A = dot(self._A, expm2(dA))
        self._invA = dot(expm2(-dA), self._invA)
        self._logDetA += 0.5 * self.scaleLearningRate * covTrace
        if self.storeAllDistributions:
            self._allDistributions.append((self._center.copy(), self._A.copy()))
Example #26
0
    def dw(self):
        """Calculates the Durbin-Waston statistic
        """
        de = diff(self.e,1)
        dw = dot(de,de) / dot(self.e,self.e)

        return dw
Example #27
0
def basex_core_transform(rawdata, M_vert, M_horz, Mc_vert,
                         Mc_horz, vert_left, horz_right, dr=1.0):
    """
    This is the internal function
    that does the actual BASEX transform. It requires 
    that the matrices of basis set coefficients be passed. 
    

    Parameters
    ----------
    rawdata : NxM numpy array
        the raw image.
    M_vert_etc. : Numpy arrays
        2D arrays given by the basis set calculation function
    dr : float
        pixel size. This only affects the absolute scaling of the output.


    Returns
    -------
    IM : NxM numpy array
        The abel-transformed image, a slice of the 3D distribution
    """

    # Reconstructing image  - This is where the magic happens
    Ci = scipy.dot(scipy.dot(vert_left, rawdata), horz_right) # previously: vert_left.dot(rawdata).dot(horz_right)

    # use an heuristic scaling factor to match the analytical abel transform
    # For more info see https://github.com/PyAbel/PyAbel/issues/4
    MAGIC_NUMBER = 1.1122244156826457
    Ci *= MAGIC_NUMBER/dr
    IM = scipy.dot(scipy.dot(Mc_vert, Ci), Mc_horz.T)    # Previously: Mc_vert.dot(Ci).dot(Mc_horz.T)
    # P = dot(dot(Mc,Ci),M.T) # This calculates the projection,
    # which should recreate the original image
    return IM
Example #28
0
    def lsaTransform(self,dimensions=1):
        """ Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX 
            Reduce the dimension of sigma by specified factor producing sigma'. 
            Then dot product the matrices:  U . SIGMA' . VT = MATRIX'
        """
        rows,cols= self.matrix.shape

        if dimensions <= rows: #Its a valid reduction

            #Sigma comes out as a list rather than a matrix
            u,sigma,vt = linalg.svd(self.matrix)

            #Dimension reduction, build SIGMA'
            for index in xrange(rows-dimensions, rows):
                sigma[index]=0

            #print linalg.diagsvd(sigma,len(self.matrix), len(vt))        

            #Reconstruct MATRIX'
            reconstructedMatrix= dot(dot(u,linalg.diagsvd(sigma,len(self.matrix),len(vt))),vt)

            #Save transform
            self.matrix=reconstructedMatrix

        else:
            print "dimension reduction cannot be greater than %s" % rows
Example #29
0
    def fit(self, X, y, useQR = True, addConstant = True):    
        '''Solve y = Xb.
        
        Parameters
        ----------
        x : array, shape (M, N)
        y : array, shape (M,)
        useQR : boolean
            Whether or not to use QR decomposition to fit regression line.
        addConstant: boolean
            Whether or not to add a constant column to X
        '''        
        if y.shape[0] != X.shape[0]:
            raise ValueError('incompatible dimensions')
        if addConstant:
            self.X       = c_[ones(X.shape[0]), X]

        self.y       = y
        self.X_columns = getattr(X,'columns', None)
        self.y_columns = getattr(y,'columns', None)
        
        if useQR:
            # TODO: Ehh, this is broken. Need to fix.
            Q,R = scipy.linalg.qr(self.X)
            Qty = dot(Q.T, y)
            self.b = scipy.linalg.solve(R,Qty)
        else:
            self.inv_xx = inv(dot(self.X.T,self.X))
            xy = dot(self.X.T,self.y)
            self.b = dot(self.inv_xx,xy)

        self.computeStatistics()
Example #30
0
    def estimate(self):

        # estimating coefficients, and basic stats

        self.inv_xx = inv(dot(self.x.T,self.x))
            
        xy = dot(self.x.T,self.y)
        self.betas = dot(self.inv_xx,xy)                    # estimate coefficients

        self.nobs = self.y.shape[0]                     # number of observations
        self.ncoef = self.x.shape[1]                    # number of coef.
        self.df_e = self.nobs - self.ncoef              # degrees of freedom, error 
        self.df_r = self.ncoef - 1                      # degrees of freedom, regression 

        self.e = self.y - dot(self.x,self.betas)            # residuals
        self.sse = dot(self.e,self.e)/self.df_e         # SSE
        self.se = sqrt(diagonal(self.sse*self.inv_xx))  # coef. standard errors
        self.t = self.betas / self.se                       # coef. t-statistics

        self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2    # coef. p-values


        self.R2 = 1 - self.e.var()/self.y.var()         # model R-squared
        self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef))   # adjusted R-square

        self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e)  # model F-statistic
        self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e)  # F-statistic p-value
Example #31
0
def simple_interaction_kronecker(snps,
                                 phenos,
                                 covs=None,
                                 Acovs=None,
                                 Asnps1=None,
                                 Asnps0=None,
                                 K1r=None,
                                 K1c=None,
                                 K2r=None,
                                 K2c=None,
                                 covar_type='lowrank_diag',
                                 rank=1,
                                 NumIntervalsDelta0=100,
                                 NumIntervalsDeltaAlt=0,
                                 searchDelta=False):
    """
    I-variate fixed effects interaction test for phenotype specific SNP effects

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps1:         list of SP.arrays of I interaction variables to be tested for N
                        individuals. Note that it is assumed that Asnps0 is already included.
                        If not provided, the alternative model will be the independent model
        Asnps0:         single SP.array of I0 interaction variables to be included in the
                        background model when testing for interaction with Inters
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are
                        'freeform': free form optimization,
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix,
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,
        rank:           rank of a possible lowrank component (default 1)
        NumIntervalsDelta0:  number of steps for delta optimization on the null model (100)
        NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization)
        searchDelta:     Carry out delta optimization on the alternative model? if yes We use NumIntervalsDeltaAlt steps
    Returns:
        pv:     P-values of the interaction test
        pv0:    P-values of the null model
        pvAlt:  P-values of the alternative model
    """
    S = snps.shape[1]
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if (Asnps0 is None):
        Asnps0 = [SP.ones([1, P])]
    if Asnps1 is None:
        Asnps1 = [SP.eye([P])]
    if (type(Asnps0) != list):
        Asnps0 = [Asnps0]
    if (type(Asnps1) != list):
        Asnps1 = [Asnps1]
    assert (len(Asnps0) == 1) and (
        len(Asnps1) >
        0), "need at least one Snp design matrix for null and alt model"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps1), snps.shape[1]))
    lrt = SP.zeros((len(Asnps1), snps.shape[1]))
    pvAlt = SP.zeros((len(Asnps1), snps.shape[1]))
    lrtAlt = SP.zeros((len(Asnps1), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM for null model
    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in range(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)

    #delta serch on alt. model?
    if searchDelta:
        lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt)
        lmm.setNumIntervals0_inter(NumIntervalsDeltaAlt)
    else:
        lmm.setNumIntervalsAlt(0)
        lmm.setNumIntervals0_inter(0)

    lmm.setNumIntervals0(NumIntervalsDelta0)
    #add SNP design
    lmm.setSNPcoldesign0_inter(Asnps0[0])
    for iA in range(len(Asnps1)):
        lmm.setSNPcoldesign(Asnps1[iA])
        lmm.process()

        pvAlt[iA, :] = lmm.getPv()[0]
        pv[iA, :] = lmm.getPv()[1]
        pv0 = lmm.getPv()[2]
    return pv, pv0, pvAlt
Example #32
0
                             delimiter='\t'))).astype(float)

# remove snp label
X = X[:, :n_s]
n_f = X.shape[0]
for i in xrange(n_f):
    sd = (X[i]).std()
    if sd == 0:
        X[i] = X[i] - (X[i]).mean()
    else:
        X[i] = (X[i] - (X[i]).mean()) / sd
X = X.T
print X
print X.shape

K1 = 1.0 / n_f * SP.dot(X, X.T)
print K1.shape
K = K1
print K

parents = SP.array(list(csv.reader(open('parents.txt', 'rb'),
                                   delimiter='\t'))).astype(int)
parents = parents[:4754, :]
idxm = range(1, 191)
SP.random.shuffle(idxm)
idxm = idxm[:5]
idxf = range(1, 26)
SP.random.shuffle(idxf)
idxf = idxf[:5]

train = []
def content_based():

    dir_name = os.path.dirname(__file__)

    f_path = os.path.join(dir_name, "anonymous-msweb.data")
    raw_data = pd.read_csv(f_path, header=None, skiprows=7)

    #creating user profile
    user_activity = raw_data.loc[raw_data[0] != "A"]

    user_activity.columns = ['category', 'value', 'vote', 'desc', 'url']

    #extract only first two columns
    user_activity = user_activity[['category', 'value']]

    site_count = len(
        user_activity.loc[user_activity['category'] == "V"].value.unique())
    case_count = len(
        user_activity.loc[user_activity['category'] == "C"].value.unique())

    print ' case/rating count: {},  site_count: {}'.format(
        case_count, site_count)

    tmp = 0
    nextrow = False

    lastindex = user_activity.index[len(user_activity) - 1]

    for index, row in user_activity.iterrows():
        if (index <= lastindex):
            if (user_activity.loc[index, 'category'] == "C"):
                # append two columns userid and webid to the user_activity dataframe
                tmp = 0

                userid = user_activity.loc[index, 'value']
                tmp = userid
                nextrow = True
                # C records always followed by V records,
            elif (user_activity.loc[index, 'category'] == "V"
                  and nextrow == True):

                webid = user_activity.loc[index, 'value']
                user_activity.loc[index, 'webid'] = webid
                # retrieve userid from previous C record, temporarily stored in tmp
                user_activity.loc[index, 'userid'] = tmp
                if (index != lastindex
                        and user_activity.loc[index + 1, 'category'] == "C"):
                    # the last 'V' record for previous C record
                    nextrow = False

    # only keep all V records, which contains both webids and userids
    user_activity = user_activity[user_activity['category'] == "V"]

    # only keep columns userid and webid
    user_activity = user_activity[['userid', 'webid']]
    user_activity_sort = user_activity.sort('webid', ascending=True)

    user_activity['userid'].unique().shape[0]
    user_activity['webid'].unique().shape[0]

    plt.hist(user_activity[webid])
    plt.show()

    sLength = len(user_activity_sort['webid'])

    #add a rating column, default value: 1
    user_activity_sort['rating'] = pd.Series(np.ones((sLength, )),
                                             index=user_activity.index)

    #create a pivot, index is userid, columns are different webid, value = count the occurence of [userid, webid], set to 0 if none.
    rating_matrix = user_activity_sort.pivot(index='userid',
                                             columns='webid',
                                             values='rating').fillna(0)

    rating_matrix = rating_matrix.to_dense().as_matrix()

    #creating item profile
    items = raw_data.loc[raw_data[0] == "A"]
    items.columns = ['record', 'webid', 'vote', 'desc', 'url']
    items = items[['webid', 'desc']]
    items['webid'].unique().shape[0]

    items_rated = items[items['webid'].isin(user_activity['webid'].tolist())]
    items_rated_sorted = items_rated.sort('webid', ascending=True)

    #project items to vector space using tfidf based on 'desc'

    v = TfidfVectorizer(stop_words="english",
                        max_features=100,
                        ngram_range=(0, 3),
                        sublinear_tf=True)

    v.get_feature_names()

    #transform items desc to doc-term matrix
    x = v.fit_transform(items_rated_sorted['desc'])

    item_profile = x.to_dense()
    np.savetxt("tf_idf", x.todense())

    # dot product of rating matrix and item profile to get user_profile

    user_profile = dot(
        rating_matrix,
        item_profile) / linalg.norm(rating_matrix) / linalg.norm(item_profile)

    # recommendations based on the smilarity between  user profile an item profile

    similarityCalc = sklearn.metrics.pairwise.cosine_similarity(
        user_profile, item_profile, dense_output=True)

    final_pred = np.where(similarityCalc > 0.6, 1, 0)
    np.savetxt('pred', final_pred)
    final_pred.shape
Example #34
0
def reduced_track_indices(coordinate_list, timesteps=None):
    # returns a list of indices of trackpoints that constitute the reduced track
    # takes a list of kartesian coordinate tuples
    m = len(coordinate_list)
    if m == 0:
        return []
    if timesteps is not None and len(timesteps) != len(coordinate_list):
        timesteps = None

    # number of dimensions
    d = len(coordinate_list[0])

    # remove identical entries (can speed up algorithm considerably)
    original_indices = [0]
    points = [{'p': coordinate_list[0], 'weight': 1}]
    if timesteps is not None:
        points[0]['t'] = timesteps[0]
    for i in range(1, m):
        if False in [
                coordinate_list[i - 1][j] == coordinate_list[i][j]
                for j in range(d)
        ]:
            original_indices.append(i)
            points.append({'p': coordinate_list[i], 'weight': 1})
            if timesteps is not None:
                points[-1]['t'] = timesteps[i]
        else:
            points[-1]['weight'] += 1
    n = len(points)

    # progress printing initialisations
    progress_printed = False
    progress = None
    tprint = time.time()

    # execute Dijkstra-like algorithm on points
    points[0]['cost'] = 1.0
    points[0]['prev'] = -1

    for i2 in range(1, n):
        penalties = {}
        costmin = float('inf')
        for i1 in reversed(list(range(i2))):
            p1 = array(points[i1]['p'])
            p2 = array(points[i2]['p'])
            seglength = norm(p2 - p1)

            # estimate speed between p1 and p2
            if timesteps is not None:
                dt = (points[i2]['t'] - points[i1]['t']).total_seconds()
                v = seglength / max(0.1, dt)
            else:
                v = seglength / float(i2 - i1)  # assume 1s time spacing

            max_sep = options.max_sep0 + v * options.max_sep_time
            if options.max_dist >= 0:
                max_sep = min(max_sep, options.max_sep)

            if seglength >= max_sep and i1 != i2 - 1:
                # point separation is too far
                # but always accept direct predecessor i1 = i2 - 1
                if seglength >= max_sep + options.max_dist:
                    # no chance to find a valid earlier predecessor point
                    break
                else:
                    continue

            if points[i1]['cost'] + 1.0 > costmin:
                # the possible predecessor i1 is already too bad.
                continue

            i1_i2_segment_valid = True
            lower_i1_possible = True
            distance_squaremax = 0.0
            distance_squaresum = 0.0
            distances_squared = []
            # iterate all medium points between i1 and i2
            for im in range(i1 + 1, i2):
                pm = array(points[im]['p'])
                d = distance(p1, pm, p2, options.ele_weight)
                if d <= options.max_dist:
                    d_sq = (d / options.max_dist)**2
                    distance_squaremax = max(distance_squaremax, d_sq)
                    distance_squaresum += points[im]['weight'] * d_sq
                    distances_squared.append(d_sq)
                else:
                    i1_i2_segment_valid = False

                    # check if connection to any further point i1 is impossible
                    d1 = pl.dot(p1 - p2, p1 - p2)
                    d2 = pl.dot(pm - p2, pm - p2)
                    dd = options.max_dist**2
                    d1d2 = pl.dot(p1 - p2, pm - p2)
                    # formula from cosines of point separation angle and cone-opening angles around points
                    if d1 > dd and d2 > dd and (d1d2 +
                                                dd)**2 < (d2 - dd) * (d1 - dd):
                        lower_i1_possible = False
                        break

            if not lower_i1_possible:
                break

            if i1_i2_segment_valid:
                if options.weighting == 'sqrdistmax':
                    penalties[i1] = distance_squaremax
                elif options.weighting == 'sqrdistsum':
                    penalties[i1] = distance_squaresum
                elif options.weighting == 'sqrlength':
                    penalties[i1] = (seglength / max_sep)**2
                elif options.weighting == 'mix':
                    penalties[i1] = (distance_squaremax *
                                     (1.0 + seglength / max_sep))
                elif options.weighting == 'exp':
                    penalties[i1] = 0.5 * sum([
                        0.5**i * d for i, d in enumerate(
                            sorted(distances_squared, reverse=True))
                    ])
                else:
                    penalties[i1] = 0.0

                # add a penalty for kinks
                if options.bend > 0.:
                    if points[i1]['prev'] != -1:
                        p0 = array(points[points[i1]['prev']]['p'])
                        v0 = p1 - p0
                        v1 = p2 - p1
                        if norm(v0) > 0. and norm(v1) > 0.:
                            v0 /= norm(v0)
                            v1 /= norm(v1)
                            kink = (1.0 - dot(v0, v1)) / 2.0
                            penalties[i1] += options.bend * kink

        # find best predecessor
        imin = None
        costmin = float('inf')
        for prev, penalty in penalties.items():
            # cost function is sum of points used (1.0) plus penalties
            cost = points[prev]['cost'] + 1.0 + penalty
            if cost < costmin:
                imin = prev
                costmin = cost
        points[i2]['cost'] = costmin
        points[i2]['prev'] = imin

        # print progess
        if options.verbose == 1 and (
                100 * i2) / n > progress and time.time() >= tprint + 1:
            tprint = time.time()
            progress = (100 * i2) / n
            print('\r', progress, '% of', n, 'points', end='')
            stdout.flush()
            progress_printed = True

    if progress_printed:
        print('\r', end='')

    # trace route backwards to collect final points
    final_pnums = []
    i = n - 1
    while i >= 0:
        final_pnums = [i] + final_pnums
        i = points[i]['prev']

    return [original_indices[i] for i in final_pnums]
Example #35
0
def simple_interaction_kronecker_deprecated(snps,
                                            phenos,
                                            covs=None,
                                            Acovs=None,
                                            Asnps1=None,
                                            Asnps0=None,
                                            K1r=None,
                                            K1c=None,
                                            K2r=None,
                                            K2c=None,
                                            covar_type='lowrank_diag',
                                            rank=1,
                                            searchDelta=False):
    """
    I-variate fixed effects interaction test for phenotype specific SNP effects.
    (Runs multiple likelihood ratio tests and computes the P-values in python from the likelihood ratios)

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps1:         list of SP.arrays of I interaction variables to be tested for N
                        individuals. Note that it is assumed that Asnps0 is already included.
                        If not provided, the alternative model will be the independent model
        Asnps0:         single SP.array of I0 interaction variables to be included in the
                        background model when testing for interaction with Inters
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are
                        'freeform': free form optimization,
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix,
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,
        rank:           rank of a possible lowrank component (default 1)
        searchDelta:    Boolean indicator if delta is optimized during SNP testing (default False)

    Returns:
        pv:     P-values of the interaction test
        lrt0:   log likelihood ratio statistics of the null model
        pv0:    P-values of the null model
        lrt:    log likelihood ratio statistics of the interaction test
        lrtAlt: log likelihood ratio statistics of the alternative model
        pvAlt:  P-values of the alternative model
    """
    S = snps.shape[1]
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if (Asnps0 is None):
        Asnps0 = [SP.ones([1, P])]
    if Asnps1 is None:
        Asnps1 = [SP.eye([P])]
    if (type(Asnps0) != list):
        Asnps0 = [Asnps0]
    if (type(Asnps1) != list):
        Asnps1 = [Asnps1]
    assert (len(Asnps0) == 1) and (
        len(Asnps1) >
        0), "need at least one Snp design matrix for null and alt model"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps1), snps.shape[1]))
    lrt = SP.zeros((len(Asnps1), snps.shape[1]))
    pvAlt = SP.zeros((len(Asnps1), snps.shape[1]))
    lrtAlt = SP.zeros((len(Asnps1), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM for null model
    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in range(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)
    if searchDelta: lmm.setNumIntervalsAlt(100)
    else: lmm.setNumIntervalsAlt(0)
    lmm.setNumIntervals0(100)
    #add SNP design
    lmm.setSNPcoldesign(Asnps0[0])
    lmm.process()
    dof0 = Asnps0[0].shape[0]
    pv0 = lmm.getPv()
    lrt0 = ST.chi2.isf(pv0, dof0)
    for iA in range(len(Asnps1)):
        dof1 = Asnps1[iA].shape[0]
        dof = dof1 - dof0
        lmm.setSNPcoldesign(Asnps1[iA])
        lmm.process()
        pvAlt[iA, :] = lmm.getPv()[0]
        lrtAlt[iA, :] = ST.chi2.isf(pvAlt[iA, :], dof1)
        lrt[iA, :] = lrtAlt[iA, :] - lrt0[
            0]  # Don't need the likelihood ratios, as null model is the same between the two models
        pv[iA, :] = ST.chi2.sf(lrt[iA, :], dof)
    return pv, lrt0, pv0, lrt, lrtAlt, pvAlt
Example #36
0
def forward_lmm_kronecker(snps,
                          phenos,
                          Asnps=None,
                          Acond=None,
                          K1r=None,
                          K1c=None,
                          K2r=None,
                          K2c=None,
                          covs=None,
                          Acovs=None,
                          threshold=5e-8,
                          maxiter=2,
                          qvalues=False,
                          update_covariances=False,
                          **kw_args):
    """
    Kronecker fixed effects test with forward selection

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno:  [N x P] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        threshold:      (float) P-value thrashold for inclusion in forward selection (default 5e-8)
        maxiter:        (int) maximum number of interaction scans. First scan is
                        without inclusion, so maxiter-1 inclusions can be performed. (default 2)
        qvalues:        Use q-value threshold and return q-values in addition (default False)
        update_covar:   Boolean indicator if covariances should be re-estimated after each forward step (default False)

    Returns:
        lm:             lmix LMMi object
        resultStruct with elements:
            iadded:         array of indices of SNPs included in order of inclusion
            pvadded:        array of Pvalues obtained by the included SNPs in iteration
                            before inclusion
            pvall:   [maxiter x S] SP.array of Pvalues for all iterations
        Optional:      corresponding q-values
            qvadded
            qvall
    """

    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    if Asnps is None:
        Asnps = [SP.ones([1, P])]
    if (type(Asnps) != list):
        Asnps = [Asnps]
    assert len(Asnps) > 0, "need at least one Snp design matrix"

    if Acond is None:
        Acond = Asnps
    if (type(Acond) != list):
        Acond = [Acond]
    assert len(Acond) > 0, "need at least one Snp design matrix"

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     **kw_args)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        vc = None
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'
    t0 = time.time()
    lm, pv = kronecker_lmm(snps=snps,
                           phenos=phenos,
                           Asnps=Asnps,
                           K1r=K1r,
                           K2r=K2r,
                           K1c=K1c,
                           K2c=K2c,
                           covs=covs,
                           Acovs=Acovs)

    #get pv
    #start stuff
    iadded = []
    pvadded = []
    qvadded = []
    time_el = []
    pvall = SP.zeros((pv.shape[0] * maxiter, pv.shape[1]))
    qvall = None
    t1 = time.time()
    print(("finished GWAS testing in %.2f seconds" % (t1 - t0)))
    time_el.append(t1 - t0)
    pvall[0:pv.shape[0], :] = pv
    imin = SP.unravel_index(pv.argmin(), pv.shape)
    score = pv[imin].min()
    niter = 1
    if qvalues:
        assert pv.shape[
            0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed"
        qvall = SP.zeros((maxiter, snps.shape[1]))
        qv = FDR.qvalues(pv)
        qvall[0:1, :] = qv
        score = qv[imin]
    #loop:
    while (score < threshold) and niter < maxiter:
        t0 = time.time()
        pvadded.append(pv[imin])
        iadded.append(imin)
        if qvalues:
            qvadded.append(qv[imin])
        if update_covariances and vc is not None:
            vc.addFixedTerm(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]])
            vc.setScales(
            )  #CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None
            vc.findLocalOptima(fast=True)
            K1c = vc.getEstTraitCovar(0)
            K2c = vc.getEstTraitCovar(1)
            lm.setK1c(K1c)
            lm.setK2c(K2c)
        lm.addCovariates(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]])
        for i in range(len(Asnps)):
            #add SNP design
            lm.setSNPcoldesign(Asnps[i])
            lm.process()
            pv[i, :] = lm.getPv()[0]
        pvall[niter * pv.shape[0]:(niter + 1) * pv.shape[0]] = pv
        imin = SP.unravel_index(pv.argmin(), pv.shape)
        if qvalues:
            qv = FDR.qvalues(pv)
            qvall[niter:niter + 1, :] = qv
            score = qv[imin].min()
        else:
            score = pv[imin].min()
        t1 = time.time()
        print(("finished GWAS testing in %.2f seconds" % (t1 - t0)))
        time_el.append(t1 - t0)
        niter = niter + 1
    RV = {}
    RV['iadded'] = iadded
    RV['pvadded'] = pvadded
    RV['pvall'] = pvall
    RV['time_el'] = time_el
    if qvalues:
        RV['qvall'] = qvall
        RV['qvadded'] = qvadded
    return lm, RV
Example #37
0
def kronecker_lmm(snps,
                  phenos,
                  covs=None,
                  Acovs=None,
                  Asnps=None,
                  K1r=None,
                  K1c=None,
                  K2r=None,
                  K2c=None,
                  covar_type='lowrank_diag',
                  rank=1,
                  NumIntervalsDelta0=100,
                  NumIntervalsDeltaAlt=0,
                  searchDelta=False):
    """
    simple wrapper for kroneckerLMM code

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        phenos: [N x P] SP.array of P phenotypes for N individuals
        covs:           list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i]
        Acovs:          list of SP.arrays holding the phenotype design matrices for covariates.
                        Each covs[i] has one corresponding Acovs[i].
        Asnps:          single SP.array of I0 interaction variables to be included in the
                        background model when testing for interaction with Inters
                        If not provided, the alternative model will be the independent model
        K1r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K1c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2r:    [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        K2c:    [P x P] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covar_type:     type of covaraince to use. Default 'freeform'. possible values are
                        'freeform': free form optimization,
                        'fixed': use a fixed matrix specified in covar_K0,
                        'diag': optimize a diagonal matrix,
                        'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank,
                        'block': optimize the weight of a constant P x P block matrix of ones,
                        'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix,
                        'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix,
        rank:           rank of a possible lowrank component (default 1)
        NumIntervalsDelta0:  number of steps for delta optimization on the null model (100)
        NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization)
        searchDelta:    Boolean indicator if delta is optimized during SNP testing (default False)

    Returns:
        CKroneckerLMM object
        P-values for all SNPs from liklelihood ratio test
    """
    #0. checks
    N = phenos.shape[0]
    P = phenos.shape[1]

    if K1r == None:
        K1r = SP.dot(snps, snps.T)
    else:
        assert K1r.shape[0] == N, 'K1r: dimensions dismatch'
        assert K1r.shape[1] == N, 'K1r: dimensions dismatch'

    if K2r == None:
        K2r = SP.eye(N)
    else:
        assert K2r.shape[0] == N, 'K2r: dimensions dismatch'
        assert K2r.shape[1] == N, 'K2r: dimensions dismatch'

    covs, Acovs = updateKronCovs(covs, Acovs, N, P)

    #Asnps can be several designs
    if Asnps is None:
        Asnps = [SP.ones([1, P])]
    if (type(Asnps) != list):
        Asnps = [Asnps]
    assert len(Asnps) > 0, "need at least one Snp design matrix"

    #one row per column design matrix
    pv = SP.zeros((len(Asnps), snps.shape[1]))

    #1. run GP model to infer suitable covariance structure
    if K1c == None or K2c == None:
        vc = estimateKronCovariances(phenos=phenos,
                                     K1r=K1r,
                                     K2r=K2r,
                                     K1c=K1c,
                                     K2c=K2c,
                                     covs=covs,
                                     Acovs=Acovs,
                                     covar_type=covar_type,
                                     rank=rank)
        K1c = vc.getEstTraitCovar(0)
        K2c = vc.getEstTraitCovar(1)
    else:
        assert K1c.shape[0] == P, 'K1c: dimensions dismatch'
        assert K1c.shape[1] == P, 'K1c: dimensions dismatch'
        assert K2c.shape[0] == P, 'K2c: dimensions dismatch'
        assert K2c.shape[1] == P, 'K2c: dimensions dismatch'

    #2. run kroneckerLMM

    lmm = limix.CKroneckerLMM()
    lmm.setK1r(K1r)
    lmm.setK1c(K1c)
    lmm.setK2r(K2r)
    lmm.setK2c(K2c)
    lmm.setSNPs(snps)
    #add covariates
    for ic in range(len(Acovs)):
        lmm.addCovariates(covs[ic], Acovs[ic])
    lmm.setPheno(phenos)

    #delta serch on alt. model?
    if searchDelta:
        lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt)
    else:
        lmm.setNumIntervalsAlt(0)
    lmm.setNumIntervals0(NumIntervalsDelta0)

    for iA in range(len(Asnps)):
        #add SNP design
        lmm.setSNPcoldesign(Asnps[iA])
        lmm.process()
        pv[iA, :] = lmm.getPv()[0]
    return lmm, pv
Example #38
0
 def LcGrad_n(self, i):
     RV = sp.dot(self.U_CstarGrad_n(i).T, self.Cn.USi2().T)
     RV += sp.dot(self.U_Cstar().T, self.Cn.USi2grad(i).T)
     return RV
Example #39
0
 def CstarGrad_n(self, i):
     RV = sp.dot(self.Cn.USi2grad(i).T, sp.dot(self.Cg.K(), self.Cn.USi2()))
     RV += sp.dot(self.Cn.USi2().T, sp.dot(self.Cg.K(),
                                           self.Cn.USi2grad(i)))
     #RV+= RV.T
     return RV
Example #40
0
 def predict(self):
     """ predict the value of the fixed effect (F*B) """
     return sp.dot(self.F,self.B)
Example #41
0
 def LcGradCnLc(self, i):
     return sp.dot(self.Lc(), sp.dot(self.Cn.K_grad_i(i), self.Lc().T))
Example #42
0
 def LcGrad_g(self, i):
     return sp.dot(self.U_CstarGrad_g(i).T, self.Cn.USi2().T)
Example #43
0
 def Cstar(self):
     return sp.dot(self.Cn.USi2().T, sp.dot(self.Cg.K(), self.Cn.USi2()))
Example #44
0
 def CstarGrad_g(self, i):
     return sp.dot(self.Cn.USi2().T,
                   sp.dot(self.Cg.Kgrad_param(i), self.Cn.USi2()))
Example #45
0
 def sortKey(point_):
     distance = point - point_
     return - dot(distance.T, distance)
Example #46
0
 def Lc(self):
     return sp.dot(self.U_Cstar().T, self.Cn.USi2().T)
Example #47
0
 def _findLocalBall_noinline(self, point):
     """Return the index of the ball that the point lies in."""
     for i, ball in enumerate(self.gridBalls):
         distance = point - ball
         if dot(distance.T, distance) <= self.radiusSquared:
             return i
Example #48
0
        RV = sp.dot(self.U_CstarGrad_n(i).T, self.Cn.USi2().T)
        RV += sp.dot(self.U_Cstar().T, self.Cn.USi2grad(i).T)
        return RV

    def Sgrad_g(self, i):
        return sp.kron(self.S_CstarGrad_g(i), self.Sr())

    def Sgrad_n(self, i):
        return sp.kron(self.S_CstarGrad_n(i), self.Sr())


if __name__ == '__main__':
    from limix.core.covar import FreeFormCov
    from limix.utils.preprocess import covar_rescale

    # define row caoriance
    dim_r = 10
    X = sp.rand(dim_r, dim_r)
    R = covar_rescale(sp.dot(X, X.T))

    # define col covariances
    dim_c = 3
    Cg = FreeFormCov(dim_c)
    Cn = FreeFormCov(dim_c)

    cov = Cov2KronSum(Cg=Cg, Cn=Cn, R=R)
    cov.setRandomParams()

    print((cov.K()))
    print((cov.K_grad_i(0)))
Example #49
0
 def _calcBaseline(self, shapedfitnesses):
     paramWeightings = dot(ones(self.batchSize), self.phiSquareWindow)
     baseline = dot(shapedfitnesses, self.phiSquareWindow) / paramWeightings
     return baseline
Example #50
0
 def insert(self, point, satellite):
     """Put a point and its satellite information into the hash structure.
     """
     point = dot(self.projection, point)
     index = self.findBall(point)
     self.balls[index].append((point, satellite))
Example #51
0
 def _logDerivX(self, sample, x, invSigma):
     return dot(invSigma, (sample - x))
Example #52
0
 def _revertToSafety(self):
     """ When encountering a bad matrix, this is how we revert to a safe one. """
     self.factorSigma = eye(self.numParameters)
     self.x = self.bestEvaluable
     self.allFactorSigmas[-1][:] = self.factorSigma
     self.sigma = dot(self.factorSigma.T, self.factorSigma)
Example #53
0
def train_interactX(X,Y,K,interactants=None,covariates=None,addBiasTerm=True,numintervalsAlt=0,ldeltaminAlt=-1.0,ldeltamaxAlt=1.0,numintervals0=10,ldeltamin0=-5.0,ldeltamax0=5.0):
    """ compute all pvalues
    If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models)
    difference to previous model: Ux and Ucovariate are recomputed for every SNP
    """
    n,s=X.shape;
    n_pheno=Y.shape[1];
    S,U=LA.eigh(K);
    UY=SP.dot(U.T,Y);
    UX=SP.dot(U.T,X);
    if (covariates==None):
        covariates = SP.ones([n,0])
    if (addBiasTerm):
        covariates=SP.concatenate((covariates,SP.ones([n,1])),axis=1)
    #Ucovariates
    Ucovariate=SP.dot(U.T,covariates);

    #Uinteractants
    Uinteractants = SP.dot(U.T,interactants)
    n_covar=covariates.shape[1]
    n_inter=interactants.shape[1]
    #weights
    #foreground: covaraits + SNP + interactions 
    beta = SP.empty((n_pheno,s,1+n_covar+2*n_inter));
    #background: covariates + direct SNP effect
    beta0 = SP.empty((n_pheno,s,1+n_covar+n_inter));
    LL=SP.ones((n_pheno,s))*(-SP.inf);
    LL0=SP.ones((n_pheno,s))*(-SP.inf);
    ldelta=SP.empty([n_pheno,s]);
    ldelta0=SP.empty([n_pheno,s]);
    sigg2=SP.empty((n_pheno,s));
    sigg20=SP.empty((n_pheno,s));
    pval=SP.ones((n_pheno,s))*(-SP.inf);
    #0. fit 0 model on phenotypes and covariates alone
    for phen in SP.arange(n_pheno):
        #fit if phen is visited the first time
        #loop through phenoptypes
        #get transformed Y
        UY_=UY[:,phen]        
        #1. fit background model to set delta
        ldelta0[phen,:]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0);
            
    #1. loop through all snps
    for snp in SP.arange(s):
        #loop through all SNPs
        #1. snp-specific backgroud model SNP effect + covaraites + interactants
        Ucovariates_=SP.hstack((UX[:,snp:snp+1],Uinteractants,Ucovariate))
        #2. snp-specific foreground model
        #interactions
        Xi_ = X[:,snp:snp+1]*interactants
        #transform
        UXi_ = SP.dot(U.T,Xi_)
        #stack: interactions, interactants (main) SNPs (main) covariates (if any)
        UX_  = SP.hstack((UXi_,Ucovariates_))

        for phen in SP.arange(n_pheno):
            UY_=UY[:,phen]        
            #loop through all phenotypes
            #emmaX trick
            ldelta[phen,snp]=ldelta0[phen,snp]
            #evluate background and foreground
            #null model
            nLL0_, beta0_, sigg20_=nLLeval(ldelta0[phen,snp],UY_,Ucovariates_,S,MLparams=True)
            beta0[phen,snp,:]=beta0_
            sigg20[phen,snp]=sigg20_
            LL0[phen,snp]=-nLL0_                   
            #foreground model
            nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True)
            beta[phen,snp,:]=beta_
            sigg2[phen,snp]=sigg2_
            LL[phen,snp]=-nLL_
    pval = st.chi2.sf(2*(LL-LL0),1)
    return LL0, LL, pval, ldelta0, sigg20, beta0, ldelta, sigg2, beta
Example #54
0
 def _logDerivsX(self, samples, x, invSigma):
     samplesArray = array(samples)
     tmpX = multiply(x, ones((len(samplesArray), self.numParameters)))
     return dot(invSigma, (samplesArray - tmpX).T).T
Example #55
0
def calc_ld_table(snps,
                  max_ld_dist=2000,
                  min_r2=0.2,
                  verbose=True,
                  normalize=False):
    """
    Calculate LD between all SNPs using a sliding LD square
    
    This function only retains r^2 values above the given threshold
    """
    # Normalize SNPs (perhaps not necessary, but cheap)
    if normalize:
        snps = snps.T
        snps = (snps - sp.mean(snps, 0)) / sp.std(snps, 0)
        snps = snps.T

    if verbose:
        print('Calculating LD table')
    t0 = time.time()
    num_snps, num_indivs = snps.shape
    ld_table = {}
    for i in range(num_snps):
        ld_table[i] = {}

    a = min(max_ld_dist, num_snps)
    num_pairs = (a * (num_snps - 1)) - a * (a + 1) * 0.5
    if verbose:
        print('Correlation between %d pairs will be tested' % num_pairs)
    num_stored = 0
    for i in range(0, num_snps - 1):
        start_i = i + 1
        end_i = min(start_i + max_ld_dist, num_snps)
        ld_vec = sp.dot(snps[i], sp.transpose(
            snps[start_i:end_i])) / float(num_indivs)
        ld_vec = sp.array(ld_vec).flatten()
        for k in range(start_i, end_i):
            ld_vec_i = k - start_i
            if ld_vec[ld_vec_i]**2 > min_r2:
                ld_table[i][k] = ld_vec[ld_vec_i]
                ld_table[k][i] = ld_vec[ld_vec_i]
                num_stored += 1
        if verbose:
            if i % 1000 == 0:
                sys.stdout.write('.')
                #                 sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (min(1, float(i + 1) / (num_snps - 1)))))
                sys.stdout.flush()
    if verbose:
        sys.stdout.write('Done.\n')
        if num_pairs > 0:
            print(
                'Stored %d (%0.4f%%) correlations that made the cut (r^2>%0.3f).'
                % (num_stored, 100 * (num_stored / float(num_pairs)), min_r2))
        else:
            print('-')
    t1 = time.time()
    t = (t1 - t0)
    if verbose:
        print(
            '\nIt took %d minutes and %0.2f seconds to calculate the LD table'
            % (t / 60, t % 60))
    del snps
    return ld_table
Example #56
0
def ld_pruning(data_file=None,
               ld_radius=None,
               out_file_prefix=None,
               p_thres=None,
               verbose=False,
               max_r2=0.2):
    """
    LD pruning + P-value thresholding 
    """

    df = h5py.File(data_file, 'r')
    has_phenotypes = False
    if 'y' in list(df.keys()):
        'Validation phenotypes found.'
        y = df['y'][...]  # Phenotype
        num_individs = len(y)
        risk_scores = sp.zeros(num_individs)
        has_phenotypes = True

    print('')
    if max_r2 < 1:
        print(
            'Applying LD-pruning + P-value thresholding with p-value threshold of %0.2e, a LD radius of %d SNPs, and a max r2 of %0.2f'
            % (p_thres, ld_radius, max_r2))
    else:
        if p_thres < 1:
            print(
                'Applying P-value thresholding with p-value threshold of %0.2e'
                % (p_thres))
        else:
            print('Calculating polygenic risk score using all SNPs')
    results_dict = {}
    num_snps = 0
    cord_data_g = df['cord_data']

    chromsomes = []
    for chrom_str in list(cord_data_g.keys()):
        g = cord_data_g[chrom_str]
        betas = g['betas'][...]
        n_snps = len(betas)
        num_snps += n_snps
        chromsomes.append(int((chrom_str.split('_'))[1]))

    chromsomes.sort()
    p_str = '%0.4f' % p_thres
    results_dict[p_str] = {}

    if out_file_prefix:
        #Preparing output files
        raw_effect_sizes = []
        raw_pval_effect_sizes = []
        updated_effect_sizes = []
        updated_pval_effect_sizes = []
        sids = []
        chromosomes = []
        positions = []
        nts = []

    tot_num_snps = 0
    num_snps_used = 0
    for chrom in chromsomes:
        chrom_str = 'chrom_%d' % chrom
        #print 'Chromosome %s:' % chrom_str
        g = cord_data_g[chrom_str]
        pvalues = g['ps'][...]
        snp_filter = pvalues < p_thres
        num_snps = sp.sum(snp_filter)
        if num_snps == 0:
            #print 'No SNPs, skipping chromosome'
            continue
        tot_num_snps += num_snps

        pvalues = pvalues[snp_filter]
        if 'raw_snps_val' in list(g.keys()):
            raw_snps = g['raw_snps_val'][...][snp_filter]

        else:
            raw_snps = g['raw_snps_ref'][...][snp_filter]

        snp_means = g['snp_means_ref'][...][snp_filter]
        snp_stds = g['snp_stds_ref'][...][snp_filter]
        raw_betas = g['log_odds'][...][snp_filter]
        pval_derived_betas = g['betas'][...][snp_filter]
        if out_file_prefix:
            chromosomes.extend([chrom_str] * len(pval_derived_betas))
            positions.extend(g['positions'][...][snp_filter])
            sids.extend(g['sids'][...][snp_filter])
            raw_effect_sizes.extend(raw_betas)
            raw_pval_effect_sizes.extend(pval_derived_betas)
            nts.extend(g['nts'][...][snp_filter])

        if max_r2 < 1:
            #print 'Generating LD table from genotypes.'
            snp_means.shape = (len(snp_means), 1)
            snp_stds.shape = (len(snp_means), 1)
            #Normalize SNPs..
            norm_ref_snps = sp.array((raw_snps - snp_means) / snp_stds,
                                     dtype='float32')
            ld_table = ld.calc_ld_table(norm_ref_snps,
                                        max_ld_dist=ld_radius,
                                        min_r2=max_r2,
                                        verbose=verbose)

            updated_raw_betas, pruning_vector = smart_ld_pruning(
                raw_betas,
                ld_table,
                pvalues=pvalues,
                max_ld=max_r2,
                verbose=verbose)
            updated_pval_derived_betas = pval_derived_betas * pruning_vector
            num_snps_used += sp.sum(pruning_vector)
        else:
            updated_raw_betas = sp.copy(raw_effect_sizes)
            updated_pval_derived_betas = sp.copy(pval_derived_betas)
            updated_pval_derived_betas = updated_pval_derived_betas / (
                snp_stds.flatten())
            pruning_vector = sp.ones(len(pval_derived_betas))
            num_snps_used += sp.sum(pruning_vector)

        if out_file_prefix:
            updated_effect_sizes.extend(updated_raw_betas)
            updated_pval_effect_sizes.extend(updated_pval_derived_betas)

        if has_phenotypes:
            print('Calculating scores for Chromosome %s' % chrom_str)
            prs = sp.dot(updated_raw_betas, raw_snps)
            risk_scores += prs
            corr = sp.corrcoef(y, prs)[0, 1]
            r2 = corr**2
            print('The R2 prediction accuracy of PRS using %s was: %0.4f' %
                  (chrom_str, r2))

    print('There were %d (SNP) effects after p-value thresholding' %
          tot_num_snps)
    print('After LD-pruning %d SNPs had non-zero effects' % num_snps_used)
    if has_phenotypes:
        num_indivs = len(y)
        results_dict[p_str]['y'] = y
        results_dict[p_str]['risk_scores'] = risk_scores
        print('Prediction accuracy was assessed using %d individuals.' %
              (num_indivs))

        corr = sp.corrcoef(y, risk_scores)[0, 1]
        r2 = corr**2
        results_dict[p_str]['r2_pd'] = r2
        print(
            'The  R2 prediction accuracy (observed scale) for the whole genome was: %0.4f (%0.6f)'
            % (r2, ((1 - r2)**2) / num_indivs))

        if corr < 0:
            risk_scores = -1 * risk_scores


#         auc = calc_auc(y,risk_scores_pval_derived)
#         print 'AUC for the whole genome was: %0.4f'%auc

#Now calibration
        denominator = sp.dot(risk_scores.T, risk_scores)
        y_norm = (y - sp.mean(y)) / sp.std(y)
        numerator = sp.dot(risk_scores.T, y_norm)
        regression_slope = (numerator / denominator)
        print('The slope for predictions with P-value derived  effects is:',
              regression_slope)
        results_dict[p_str]['slope_pd'] = regression_slope

    if max_r2 == 1:
        weights_out_file = '%s_all_snps.txt' % (out_file_prefix)
    else:
        weights_out_file = '%s_P+T_p%0.4e.txt' % (out_file_prefix, p_thres)
    with open(weights_out_file, 'w') as f:
        f.write(
            'chrom    pos    sid    nt1    nt2    raw_beta    raw_pval_beta    updated_beta    updated_pval_beta \n'
        )
        for chrom, pos, sid, nt, raw_beta, raw_pval_beta, upd_beta, upd_pval_beta in it.izip(
                chromosomes, positions, sids, nts, raw_effect_sizes,
                raw_pval_effect_sizes, updated_effect_sizes,
                updated_pval_effect_sizes):
            nt1, nt2 = nt[0], nt[1]
            f.write(
                '%s    %d    %s    %s    %s    %0.4e    %0.4e    %0.4e    %0.4e\n'
                % (chrom, pos, sid, nt1, nt2, raw_beta, raw_pval_beta,
                   upd_beta, upd_pval_beta))
Example #57
0
File: xnes.py Project: hao155/xnes
    def step(self, niter):
        """ xNES """
        f = self.f
        mu, sigma, bmat = self.mu, self.sigma, self.bmat
        eta_mu, eta_sigma, eta_bmat = self.eta_mu, self.eta_sigma, self.eta_bmat
        npop = self.npop
        dim = self.dim
        sigma_old = self.sigma_old

        eyemat = eye(dim)

        with joblib.Parallel(n_jobs=self.n_jobs) as parallel:

            for i in range(niter):
                s_try = randn(npop, dim)
                z_try = mu + sigma * dot(s_try, bmat)  # broadcast

                f_try = parallel(joblib.delayed(f)(z) for z in z_try)
                f_try = asarray(f_try)

                # save if best
                fitness = mean(f_try)
                if fitness - 1e-8 > self.fitness_best:
                    self.fitness_best = fitness
                    self.mu_best = mu.copy()
                    self.counter = 0
                else:
                    self.counter += 1
                if self.counter > self.patience:
                    self.done = True
                    return

                isort = argsort(f_try)
                f_try = f_try[isort]
                s_try = s_try[isort]
                z_try = z_try[isort]

                u_try = self.utilities if self.use_fshape else f_try

                if self.use_adasam and sigma_old is not None:  # sigma_old must be available
                    eta_sigma = self.adasam(eta_sigma, mu, sigma, bmat,
                                            sigma_old, z_try)

                dj_delta = dot(u_try, s_try)
                dj_mmat = dot(s_try.T, s_try *
                              u_try.reshape(npop, 1)) - sum(u_try) * eyemat
                dj_sigma = trace(dj_mmat) * (1.0 / dim)
                dj_bmat = dj_mmat - dj_sigma * eyemat

                sigma_old = sigma

                # update
                mu += eta_mu * sigma * dot(bmat, dj_delta)
                sigma *= exp(0.5 * eta_sigma * dj_sigma)
                bmat = dot(bmat, expm(0.5 * eta_bmat * dj_bmat))

                # logging
                self.history['fitness'].append(fitness)
                self.history['sigma'].append(sigma)
                self.history['eta_sigma'].append(eta_sigma)

        # keep last results
        self.mu, self.sigma, self.bmat = mu, sigma, bmat
        self.eta_sigma = eta_sigma
        self.sigma_old = sigma_old
Example #58
0
def get_LDpred_ld_tables(snps,
                         ld_radius=100,
                         ld_window_size=0,
                         h2=None,
                         n_training=None,
                         gm=None,
                         gm_ld_radius=None):
    """
    Calculates LD tables, and the LD score in one go...
    """

    ld_dict = {}
    m, n = snps.shape
    ld_scores = sp.ones(m)
    ret_dict = {}
    if gm_ld_radius is None:
        for snp_i, snp in enumerate(snps):
            # Calculate D
            start_i = max(0, snp_i - ld_radius)
            stop_i = min(m, snp_i + ld_radius + 1)
            X = snps[start_i:stop_i]
            D_i = sp.dot(snp, X.T) / n
            r2s = D_i**2
            ld_dict[snp_i] = D_i
            lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32')
            ld_scores[snp_i] = lds_i
    else:
        assert gm is not None, 'Genetic map is missing.'
        window_sizes = []
        ld_boundaries = []
        for snp_i, snp in enumerate(snps):
            curr_cm = gm[snp_i]

            # Now find lower boundary
            start_i = snp_i
            min_cm = gm[snp_i]
            while start_i > 0 and min_cm > curr_cm - gm_ld_radius:
                start_i = start_i - 1
                min_cm = gm[start_i]

            # Now find the upper boundary
            stop_i = snp_i
            max_cm = gm[snp_i]
            while stop_i > 0 and max_cm < curr_cm + gm_ld_radius:
                stop_i = stop_i + 1
                max_cm = gm[stop_i]

            ld_boundaries.append([start_i, stop_i])
            curr_ws = stop_i - start_i
            window_sizes.append(curr_ws)
            assert curr_ws > 0, 'Some issues with the genetic map'

            X = snps[start_i:stop_i]
            D_i = sp.dot(snp, X.T) / n
            r2s = D_i**2
            ld_dict[snp_i] = D_i
            lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32')
            ld_scores[snp_i] = lds_i

        avg_window_size = sp.mean(window_sizes)
        print('Average # of SNPs in LD window was %0.2f' % avg_window_size)
        if ld_window_size == 0:
            ld_window_size = avg_window_size * 2
        ret_dict['ld_boundaries'] = ld_boundaries
    ret_dict['ld_dict'] = ld_dict
    ret_dict['ld_scores'] = ld_scores

    if ld_window_size > 0:
        ref_ld_matrices = []
        inf_shrink_matrices = []
        for wi in range(0, m, ld_window_size):
            start_i = wi
            stop_i = min(m, wi + ld_window_size)
            curr_window_size = stop_i - start_i
            X = snps[start_i:stop_i]
            D = sp.dot(X, X.T) / n
            ref_ld_matrices.append(D)
            if h2 != None and n_training != None:
                A = ((m / h2) * sp.eye(curr_window_size) + (n_training /
                                                            (1)) * D)
                A_inv = linalg.pinv(A)
                inf_shrink_matrices.append(A_inv)
        ret_dict['ref_ld_matrices'] = ref_ld_matrices
        if h2 != None and n_training != None:
            ret_dict['inf_shrink_matrices'] = inf_shrink_matrices
    return ret_dict
Example #59
0
def compute_JM(direction, variables, model, idx):
    """
        Function that computes the Jeffries–Matusita distance of the model using the variables : idx +/- one of variables
        Inputs:
            variables: the variable to add to idx
            model:     the model build with all the variables
            idx:       the pool of retained variables
        Output:
            JM: the estimated Jeffries–Matusita distance

        Used in GMM.forward_selection() and GMM.backward_selection()
    """
    # Get machine precision
    eps = sp.finfo(sp.float64).eps

    # Initialization
    JM = sp.zeros(variables.size)
    halfedLogdet  = sp.zeros((model.C,variables.size))

    # Compute all possible update of 0.5* log det cov(idx)
    if len(idx)==0:
        for c in xrange(model.C):
            for k,var in enumerate(variables):
                halfedLogdet[c,k] = 0.5*sp.log(model.cov[c,var,var])
    else:
        for c in xrange(model.C):
            vp,Q,_ = model.decomposition(model.cov[c,idx,:][:,idx])
            logdet = sp.sum(sp.log(vp))
            invCov = sp.dot(Q,((1/vp)*Q).T)
            for k,var in enumerate(variables):
                if direction=='forward':
                    alpha = model.cov[c,var,var] - sp.dot(model.cov[c,var,:][idx], sp.dot(invCov,model.cov[c,var,:][idx].T) )
                elif direction=='backward':
                    alpha = invCov[k,k] # it actually corresponds to 1/alpha from report

                if alpha < eps:
                    alpha = eps
                halfedLogdet[c,k]  = 0.5*( sp.log(alpha) + logdet)
        del vp,Q,alpha,invCov

    if len(idx)==0:
        for i in xrange(model.C):
            for j in xrange(i+1,model.C):
                for k,var in enumerate(variables):
                    md     = (model.mean[i,var]-model.mean[j,var])
                    cs     = (model.cov[i,var,var]+model.cov[j,var,var])/2

                    logdet_ij    = sp.log(2*cs) # 2* because we want det of 2*cs
                    invCov = 1/cs

                    bij    = md*invCov*md/8 + 0.5*( logdet_ij - halfedLogdet[i,k] - halfedLogdet[j,k] )
                    JM[k]  += sp.sqrt(2*(1-sp.exp(-bij)))*model.prop[i]*model.prop[j]

    else:
        for i in xrange(model.C):
            for j in xrange(i+1,model.C):
                cs         = (model.cov[i,idx,:][:,idx]+model.cov[j,idx,:][:,idx])/2
                vp,Q,rcond = model.decomposition(cs)
                invCov     = sp.dot(Q,((1/vp)*Q).T)
                logdet        = sp.sum(sp.log(vp))

                for k,var in enumerate(variables):
                    md      = (model.mean[i,idx]-model.mean[j,idx])

                    if direction=='forward':
                        id_t = list(idx)
                        id_t.append(var)

                        c1      = (model.cov[i,var,var]+model.cov[j,var,var])/2
                        c2      = (model.cov[i,var,:][idx]+model.cov[j,var,:][idx])/2
                        alpha = c1 - sp.dot(c2, sp.dot(invCov,c2.T) )
                        if alpha < eps:
                            alpha = eps
                        logdet_ij     = logdet + sp.log(alpha * 2**(len(id_t)) )  # *2^d because we want det of 2*cs

                        md_new   = (model.mean[i,id_t]-model.mean[j,id_t])
                        row_feat = sp.hstack((-1/alpha * sp.dot(c2,invCov), 1/alpha))
                        cst_feat = alpha * (sp.dot(row_feat,md_new.T)**2)

                    elif direction=='backward':
                        alpha     = 1/invCov[k,k]
                        if alpha < eps:
                            alpha = eps
                        logdet_ij = logdet + sp.log(2**(len(idx)-1) / alpha)  # *2^d because we want det of 2*cs

                        row_feat   = invCov[k,:]
                        cst_feat   = - alpha * (sp.dot(row_feat,md.T)**2)

                    temp = sp.dot(md, sp.dot(invCov,md.T) ) + cst_feat

                    bij = temp/8 + 0.5*(logdet_ij - halfedLogdet[i,k] - halfedLogdet[j,k] )
                    if bij < eps:
                        bij = eps
                    JM[k] += sp.sqrt(2*(1-sp.exp(-bij)))*model.prop[i]*model.prop[j]

    return JM
Example #60
0
def findRotationMatrix(P, Q, R):
    # Make scipy vectors out of P,Q,R
    Pv = r_[P.x, P.y, P.z]
    Qv = r_[Q.x, Q.y, Q.z]
    Rv = r_[R.x, R.y, R.z]
    """ 1.) Find a vector that is normal to the plane (P,Q,R) by
          finding two vectors (a,b) and finding a point orthogonal
          to both. Vectors a and b lie in the plane translated to
          origin. """
    a = Qv - Pv
    b = Rv - Pv
    n = cross_product(a, b)
    n = n / mag(n)  # Normalize
    """ 2.) Create the new axes. These will be used to transform the
          original axis into the plane """
    y_new = a / mag(a)
    z_new = n  # Now all vectors in the new axis with a 0 z component will lie
    # in the plane
    x_new = cross_product(y_new, z_new) / mag(cross_product(y_new, z_new))
    """ 3.) Finding euler's angle requires a little bit of geometry. Please refer to
          M.E. Rose, Elementary Theory of Angular Momemtum, Wiley: New York 1957 for
          an explanations of the angles. Unfortunately there are a lot of different
          conventions, and I randomly picked the one mentioned above. """
    # Beta is measured between the new z axis and the old z axis. All angles are measured
    # counter-clockwise, so if the the y value of the new z axis is < 0 then we have to substract
    # the angle from 360 degrees.
    #line=cross_product([0,0,1],z_new)
    if (z_new[1] < 0):
        # line is a vector that specifies the intersection of the two x,y planes
        # alpha and gamma around found using this line
        line = cross_product(z_new, [0, 0, 1])
        beta = 2 * pi - acos(
            dot(z_new, [0, 0, 1]) / (mag(z_new) * mag([0, 0, 1])))
    else:
        # line is a vector that specifies the intersection of the two x,y planes
        # alpha and gamma around found using this line
        line = cross_product([0, 0, 1], z_new)
        beta = acos(dot(z_new, [0, 0, 1]) / (mag(z_new) * mag([0, 0, 1])))
    # Alpha is the angle between 'line' and the original y axis.
    if (line[0] > 0):
        alpha = 2 * pi - acos(
            dot(line, [0, 1, 0]) / (mag(line) * mag([0, 1, 0])))
    else:
        alpha = acos(dot(line, [0, 1, 0]) / (mag(line) * mag([0, 1, 0])))
    # Gamma is the angle between 'line' and the new y axis
    if ((beta < pi and y_new[2] < 0) or (beta > pi and y_new[2] > 0)):
        gamma = 2 * pi - acos(dot(line, y_new) / (mag(line) * mag(y_new)))
    else:
        gamma = acos(dot(line, y_new) / (mag(line) * mag(y_new)))
    """ 4.) Now all we need to do is find the rotation matrix. This can be
          found by multiplying each of the three rotation matrixes (one for each angle). """
    rot11 = cos(alpha) * cos(beta) * cos(gamma) - sin(alpha) * sin(gamma)
    rot12 = -cos(alpha) * cos(beta) * sin(gamma) - sin(alpha) * cos(gamma)
    rot13 = cos(alpha) * sin(beta)
    rot21 = sin(alpha) * cos(beta) * cos(gamma) + cos(alpha) * sin(gamma)
    rot22 = -sin(alpha) * cos(beta) * sin(gamma) + cos(alpha) * cos(gamma)
    rot23 = sin(alpha) * sin(beta)
    rot31 = -sin(beta) * cos(gamma)
    rot32 = sin(beta) * sin(gamma)
    rot33 = cos(beta)
    rot = r_[[[rot11, rot12, rot13]], [[rot21, rot22, rot23]],
             [[rot31, rot32, rot33]]]
    return rot