Esempio n. 1
0
 def divide(self,b,mode):
     if mode == 1:
         y = sci.lstsq(self.matrix,b)
     else:
         y = sci.lstsq(self.matrix.conj().T,b)
     
     return y[0]
Esempio n. 2
0
def solveQ3(trainData, testData, columnLabel):
    target = trainingData[:,dataMEDVCol:dataMEDVCol+1]
    targetTest = testingData[:,dataMEDVCol:dataMEDVCol+1]
    Ones = np.ones((len(target),1))

    # Fitting the parameters: theta = (X'*X)^-1*X'*y
    Xtrain = np.hstack((Ones, trainData.reshape(len(Ones),1)))
    mTheta = lstsq(Xtrain, target)[0]
    target_pred = dot(Xtrain, mTheta)
    t = target-target_pred
    msePred = sum((target-target_pred)**2)/len(target)
    meanTarget = sum(target)/len(target)
    varianceTarget = sum((target-meanTarget)**2)/len(target)
    FVU = msePred/varianceTarget

    Xtest = np.hstack((Ones, testData.reshape(len(Ones),1)))
    mThetaTest = lstsq(Xtest, targetTest)[0]
    # use theta from training set, not from testing set
    target_pred_test = dot(Xtest, mTheta)

    msePredTest = sum((targetTest-target_pred_test)**2)/len(targetTest)
    meanTargetTest = sum(targetTest)/len(targetTest)
    varianceTargetTest = sum((targetTest-meanTargetTest)**2)/len(targetTest)
    FVUTest = msePredTest/varianceTargetTest

    print '###',columnLabel,'###'
    print 'MSE training set:', msePred
    print 'MSE testing set:', msePredTest
    print 'R2 of testing set against theta from training set:', 1 - FVUTest,'\n'
Esempio n. 3
0
def backgroundCorrectPSFWF(d):
    import numpy as np
    from scipy import linalg
    
    zf = d.shape[2]/2
        
    #subtract a linear background in x
    Ax = np.vstack([np.ones(d.shape[0]), np.arange(d.shape[0])]).T        
    bgxf = (d[0,:,zf] + d[-1,:,zf])/2
    gx = linalg.lstsq(Ax, bgxf)[0]
    
    d = d - np.dot(Ax, gx)[:,None,None]
    
    #do the same in y
    Ay = np.vstack([np.ones(d.shape[1]), np.arange(d.shape[1])]).T        
    bgyf = (d[:,0,zf] + d[:,-1,zf])/2
    gy = linalg.lstsq(Ay, bgyf)[0]
    
    d = d - np.dot(Ay, gy)[None, :,None]
    
    
    #estimate background on central slice as mean of rim pixels
    #bgr = (d[0,:,zf].mean() + d[-1,:,zf].mean() + d[:,0,zf].mean() + d[:,-1,zf].mean())/4
    
    #sum over all pixels (and hence mean) should be preserved over z (for widefield psf)
    dm = d.mean(1).mean(0)
    
    bg = dm - dm[zf]
    
    return np.maximum(d - bg[None, None, :], 0) +  1e-5
Esempio n. 4
0
def partial_corr(C):
    """
    Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling
    for the remaining variables in C.
    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable
    Returns
    -------
    P : array-like, shape (p, p)
        P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling
        for the remaining variables in C.
    """

    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    for i in range(p):
        P_corr[i, i] = 1
        for j in range(i+1, p):
            idx = np.ones(p, dtype=np.bool)
            idx[i] = False
            idx[j] = False
            beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

            res_j = C[:, j] - C[:, idx].dot( beta_i)
            res_i = C[:, i] - C[:, idx].dot(beta_j)

            corr = stats.pearsonr(res_i, res_j)[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr

    return P_corr
Esempio n. 5
0
def pcorr(C,k):
    val=list(C.columns.values)
    C[u'ones']=np.ones(C.shape[0])
    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p-1, p-1), dtype=np.float)
    idx = np.zeros(p, dtype=np.bool)
    for kk in k:
      idx[kk] = True
    idx[p-1] = True
    for i in range(p-1):
        P_corr[i, i] = 1
        for j in range(i+1, p-1):
            beta_i = linalg.lstsq(C[:, idx], C[:, i])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, j])[0]

            res_j = C[:, j] - C[:, idx].dot(beta_j)
            res_i = C[:, i] - C[:, idx].dot(beta_i)
            
            corr = stats.pearsonr(res_i, res_j)[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr
    
    p=pd.DataFrame(P_corr, index=val, columns=val)
    return p
Esempio n. 6
0
def pcorParallel(X,Z,Y=None):
    """
    computes the correlation matrix between X and Y conditioning on Z
    """
    if Y is None: return pcorParallelSym(X,Z)
    if Z is None: return corrParallel(X,Y)

 
    if Z.ndim==1: Z = Z[SP.newaxis,:]

    X = X.T
    Y = Y.T
    Z = Z.T
    
    beta,_,_,_ = LA.lstsq(Z,Y)
    Yres = Y - SP.dot(Z,beta)

    beta,_,_,_ = LA.lstsq(Z,X)
    Xres = X - SP.dot(Z,beta)

    
    nSamples = Z.shape[0]
    nCovs = Z.shape[1]
    df = max(nSamples  - 2 - nCovs,0)
    
    return corrParallel(Xres.T,Yres.T,df=df)
Esempio n. 7
0
    def process(self, data_in, obs_vec):
        """
        Generate function network model.

        :param data: Training data matrix :math:`\mathcal{X}\in\mathbb{R}^{d\\times n}`
        :type data: numpy array
        :param obs_vec: Observation vector :math:`y\in\mathbb{R}^{1 \\times n}`
        :type obs_vec: numpy array
        :return: none
        :rtype:  none
        """
        # check consistency of data
        obs_num = obs_vec.shape[1]
        data_num = data_in.shape[1]

        if obs_num != data_num:
            raise Exception("Number of samples for data and observations must be the same")
        else:
            # initialize variables
            self.data = data_in
            self.data_dim = data_in.shape[0]
            nsamp = data_num

            # peel off parameters
            ki = self.k_type
            bandwidth = ki.params[0]

            # compute regularized kernel matrix
            kmat = kernel(self.data, self.data, self.k_type) + (pow(self.noise,2))*eye(nsamp)

            # perform Cholesky factorization, and compute mean vector (for stable inverse computations)
            self.lmat = cholesky(kmat).transpose()
            self.mean_vec = lstsq(self.lmat, obs_vec)
            self.mean_vec = lstsq(self.lmat.transpose(), self.mean_vec)
Esempio n. 8
0
def pcor(X,Y,Z):
    """
    computes the correlation amtrix of X and Y conditioning on Z
    """
    if X.ndim==1: X = X[:,SP.newaxis]
    if Y.ndim==1: Y = Y[:,SP.newaxis]
    
    if Z is None: return STATS.pearsonr(X,Y)

    if Z.ndim==1: Z = Z[:,SP.newaxis]
    nSamples = X.shape[0]
    betaX, _, _, _ = LA.lstsq(Z,X)
    betaY, _, _, _ = LA.lstsq(Z,Y)
    Xres = X - SP.dot(Z,betaX)
    Yres = Y - SP.dot(Z,betaY)
    corr_cond = SP.corrcoef(Xres[:,0],Yres[:,0])[0,1]
    dz = Z.shape[1]  # dimension of conditioning variable
    df = max(nSamples - dz - 2,0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr_cond / SP.sqrt(1.0 - corr_cond ** 2)  # calculate t statistic
        
    tstat = math.sqrt(df) * tstat
    pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr_cond,pv_cond
Esempio n. 9
0
def bilinear_least_squares(X, y, b0=None, n_iter=10, fit_intercept=True):
    """assumes X.shape = n_samples, n_matrices, h, wi
       and does linear regression as a sum of rank 1 matrices"""

    if X.ndim == 3:
        X = X[:, np.newaxis]
    n_samples, n_matrices, n_feat_a, n_feat_b = X.shape
    if b0 is None:
        b0 = np.ones((n_matrices, n_feat_b)) / n_feat_b
    b = b0.copy()

    if fit_intercept:
        X_mean, y_mean = X.mean(0), y.mean()
        X = X - X_mean
        y = y - y_mean

    for i in range(n_iter):
        a_estimation_matrix = np.einsum(
                                 "ijkl, jl -> ijk", X, b).reshape(n_samples, -1)
        a = lstsq(a_estimation_matrix, y)[0].reshape(n_matrices, n_feat_a)
        b_estimation_matrix = np.einsum(
                                 "ijkl, jk -> ijl", X, a).reshape(n_samples, -1)
        b = lstsq(b_estimation_matrix, y)[0].reshape(n_matrices, n_feat_b)


    if fit_intercept:
        intercept = y_mean - np.einsum("jkl, jk, jl", X_mean, a, b)
        return a, b, intercept

    return a, b
Esempio n. 10
0
 def divide(self,x,mode):
     if mode == 1:
         y = linalg.lstsq(self.diag,x)
     else:
         y = linalg.lstsq(self.diag.conj().T,x)
     
     return y[0]
Esempio n. 11
0
def partial_corr(C):
    """
    Partial Correlation in Python (clone of Matlab's partialcorr)
    from https://gist.github.com/fabianp/9396204419c7b638d38f

    This uses the linear regression approach to compute the partial
    correlation (might be slow for a huge number of variables). The
    algorithm is detailed here:

        http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression

    Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y},
    the algorithm can be summarized as

        1) perform a normal linear least-squares regression with X as the target and Z as the predictor
        2) calculate the residuals in Step #1
        3) perform a normal linear least-squares regression with Y as the target and Z as the predictor
        4) calculate the residuals in Step #3
        5) calculate the correlation coefficient between the residuals from Steps #2 and #4;

    The result is the partial correlation between X and Y while controlling for the effect of Z

    Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling
    for the remaining variables in C.


    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable


    Returns
    -------
    P : array-like, shape (p, p)
        P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling
        for the remaining variables in C.
    """

    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    for i in range(p):
        P_corr[i, i] = 1
        for j in range(i + 1, p):
            idx = np.ones(p, dtype=np.bool)
            idx[i] = False
            idx[j] = False
            beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

            res_j = C[:, j] - C[:, idx].dot(beta_i)
            res_i = C[:, i] - C[:, idx].dot(beta_j)

            corr = stats.pearsonr(res_i, res_j)[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr

    return P_corr
 def trialFunFit_constrained(self, s, arr, alphas, pairs, zerostart=False):
     deg = len(alphas)
     carr = np.concatenate((arr.real, arr.imag))
     # construct matrix for extended fitting problem
     A = np.concatenate((1. / (s[:,None] + alphas[None,:]), \
                             arr[:,None] / (s[:,None] + alphas[None,:])), axis=1)
     # implement the constraint
     pairsnew = np.concatenate((pairs, pairs))
     for i, p in enumerate(pairsnew):
         if p:
             x1 = A[:,i] + A[:,i+1]
             x2 = 1j * (A[:,i] - A[:,i+1])
             A[:,i] = x1
             A[:,i+1] = x2
     A = np.concatenate((A.real, A.imag), axis=0)
     # find auxiliary residues
     c = la.lstsq(A, carr)[0][-len(alphas):]
     # find zeros of fitted auxiliary function
     a = np.diag(alphas)
     b = np.ones(deg)
     # implement similarity transform
     for i, p in enumerate(pairs):
         if p:
             a[i:i+2, i:i+2] = np.array([[alphas[i].real, alphas[i].imag], \
                                         [-alphas[i].imag, alphas[i].real]])
             b[i:i+2] = np.array([2,0])
     H = a.real - np.dot(b[:,None], c[None,:])
     alphanew = np.linalg.eig(H)[0]
     inds = np.argsort(alphanew)
     alphanew = alphanew[inds]
     # indicates where pairs of complex conjugate poles occur
     auxarr = np.abs((np.abs(alphanew[:-1]) - np.abs(alphanew[1:])) / np.abs(alphanew[:-1]))
     auxarr2 = np.abs(alphas.imag) > 1e-15
     pairs = np.logical_and(np.concatenate((auxarr < 1e-15, np.zeros(1, dtype=bool))), auxarr2)
     # find residues
     Anew = 1. / (s[:,None] + alphanew[None,:])
     for i, p in enumerate(pairs):
         if p:
             x1 = Anew[:,i] + Anew[:,i+1]
             x2 = 1j * (Anew[:,i] - Anew[:,i+1])
             Anew[:,i] = x1
             Anew[:,i+1] = x2
     Anew = np.concatenate((Anew.real, Anew.imag), axis=0)
     if zerostart:
         # enforce K(t=0)=0 constraint
         row1 = np.ones(2*deg)
         for i, p in enumerate(pairs):
             if p:
                 row1[i+1] = 0
         Anew = np.concatenate((np.ones((1, deg), dtype=complex), Anew), axis=0)
         carr = np.concatenate((np.zeros(1, dtype=complex), carr))
     cnew = la.lstsq(Anew, carr)[0]
     cnew = np.array(cnew, dtype=complex)
     # recast cnew to complex values
     for i, p in enumerate(pairs):
         if p:
             cnew[i:i+2] = np.array([cnew[i] + 1j * cnew[i+1], cnew[i] - 1j * cnew[i+1]])
     
     return alphanew, cnew, pairs
Esempio n. 13
0
 def time_lstsq(self, dtype, size, lapack_driver):
     if lapack_driver == 'numpy':
         np.linalg.lstsq(self.A, self.b,
                         rcond=np.finfo(self.A.dtype).eps * 100)
     else:
         sl.lstsq(self.A, self.b, cond=None, overwrite_a=False,
                  overwrite_b=False, check_finite=False,
                  lapack_driver=lapack_driver)
Esempio n. 14
0
 def fit_values(self, s, x, damp=0.0):
     Phi = complete_polynomial(s.T, self.d).T
     self.Phi = Phi
     if damp == 0.0:
         self.coefs = np.ascontiguousarray(lstsq(Phi, x)[0])
     else:
         new_coefs = np.ascontiguousarray(lstsq(Phi, x)[0])
         self.coefs = (1 - damp) * new_coefs + damp * self.coefs
Esempio n. 15
0
def _unscented_correct(cross_sigma, mu_pred, sigma2_pred, obs_mu_pred, obs_sigma2_pred, z):
    """Correct predicted state estimates with an observation

    Parameters
    ----------
    cross_sigma : [n_dim_state, n_dim_obs] array
        cross-covariance between the state at time t given all observations
        from timesteps [0, t-1] and the observation at time t
    mu_pred : [n_dim_state] array
        mean of state at time t given observations from timesteps [0, t-1]
    sigma2_pred : [n_dim_state, n_dim_state] array
        square root of covariance of state at time t given observations from
        timesteps [0, t-1]
    obs_mu_pred : [n_dim_obs] array
        mean of observation at time t given observations from times [0, t-1]
    obs_sigma2_pred : [n_dim_obs] array
        square root of covariance of observation at time t given observations
        from times [0, t-1]
    z : [n_dim_obs] array
        observation at time t

    Returns
    -------
    mu_filt : [n_dim_state] array
        mean of state at time t given observations from time steps [0, t]
    sigma2_filt : [n_dim_state, n_dim_state] array
        square root of covariance of state at time t given observations from
        time steps [0, t]
    """
    n_dim_state = len(mu_pred)
    n_dim_obs = len(obs_mu_pred)

    if not np.any(ma.getmask(z)):
        ##############################################
        # Same as this, but more stable (supposedly) #
        ##############################################
        # K = cross_sigma.dot(
        #     linalg.pinv(
        #         obs_sigma2_pred.T.dot(obs_sigma2_pred)
        #     )
        # )
        ##############################################

        # equivalent to this MATLAB code
        # K = (cross_sigma / obs_sigma2_pred.T) / obs_sigma2_pred
        K = linalg.lstsq(obs_sigma2_pred, cross_sigma.T)[0]
        K = linalg.lstsq(obs_sigma2_pred.T, K)[0]
        K = K.T

        # correct mu, sigma
        mu_filt = mu_pred + K.dot(z - obs_mu_pred)
        U = K.dot(obs_sigma2_pred)
        sigma2_filt = cholupdate(sigma2_pred, U.T, -1.0)
    else:
        # no corrections to be made
        mu_filt = mu_pred
        sigma2_filt = sigma2_pred
    return (mu_filt, sigma2_filt)
Esempio n. 16
0
def solveQ5(trainData, testData, columnLabel):
    target = trainingData[:,dataMEDVCol:dataMEDVCol+1]
    targetTest = testingData[:,dataMEDVCol:dataMEDVCol+1]
    Ones = np.ones((len(target),1))

    # Fitting the parameters: theta = (X'*X)^-1*X'*y
    Xtrain = np.hstack((Ones, trainData.reshape(len(Ones),1)))

    #firstCol = columnRM**2
    #secondCol = columnLSTAT**2
    #thirdCol = columnB**2
    #fourthCol = columnZN**2
    firstCol = trainData
    secondCol = trainData**2
    thirdCol = trainData**3
    fourthCol = trainData**4
    Xtrain = np.hstack((Xtrain, firstCol.reshape(len(Xtrain),1)))
    Xtrain = np.hstack((Xtrain, secondCol.reshape(len(Xtrain),1)))
    Xtrain = np.hstack((Xtrain, thirdCol.reshape(len(Xtrain),1)))
    Xtrain = np.hstack((Xtrain, fourthCol.reshape(len(Xtrain),1)))

    mTheta = lstsq(Xtrain, target)[0]
    target_pred = dot(Xtrain, mTheta)

    msePred = sum((target-target_pred)**2)/len(target)
    meanTarget = sum(target)/len(target)
    varianceTarget = sum((target-meanTarget)**2)/len(target)
    FVU = msePred/varianceTarget

    Xtest = np.hstack((Ones, testData.reshape(len(Ones),1)))

    #firstCol = columnTestRM**2
    #secondCol = columnTestLSTAT**2
    #thirdCol = columnTestB**2
    #fourthCol = columnTestZN**2
    firstCol = testData
    secondCol = testData**2
    thirdCol = testData**3
    fourthCol = testData**4
    Xtest = np.hstack((Xtest, firstCol.reshape(len(Xtest),1)))
    Xtest = np.hstack((Xtest, secondCol.reshape(len(Xtest),1)))
    Xtest = np.hstack((Xtest, thirdCol.reshape(len(Xtest),1)))
    Xtest = np.hstack((Xtest, fourthCol.reshape(len(Xtest),1)))

    mThetaTest = lstsq(Xtest, targetTest)[0]
    target_pred_test = dot(Xtest, mTheta)

    msePredTest = sum((targetTest-target_pred_test)**2)/len(targetTest)
    meanTargetTest = sum(targetTest)/len(targetTest)
    varianceTargetTest = sum((targetTest-meanTargetTest)**2)/len(targetTest)
    FVUTest = msePredTest/varianceTargetTest

    print '###',columnLabel,'###'
    print 'MSE training set:', msePred
    print 'MSE testing set:', msePredTest
    print 'R2 of testing set against theta from training set:', 1 - FVUTest,'\n'
Esempio n. 17
0
def partial_corr(X,Y,Z):
    """
    Partial Correlation in Python (clone of Matlab's partialcorr)
    But Returns only one partial correlation value.

    This uses the linear regression approach to compute the partial
    correlation (might be slow for a huge number of variables). The
    algorithm is detailed here:

        http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression

    Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y},
    the algorithm can be summarized as

        1) perform a normal linear least-squares regression with X as the target and Z as the predictor
        2) calculate the residuals in Step #1
        3) perform a normal linear least-squares regression with Y as the target and Z as the predictor
        4) calculate the residuals in Step #3
        5) calculate the correlation coefficient between the residuals from Steps #2 and #4;

    The result is the partial correlation between X and Y while controlling for the effect of Z

    Returns the sample linear partial correlation coefficient between X and Y controlling
    for Z.


    Parameters
    ----------
    X : vector (length n)
    Y : vector (length n)
    Z : array-like, shape (n, p) where p are the variables to control for


    Returns
    -------
    pcorr : float - partial correlation between X and Y controlling for Z

    Adapted from https://gist.github.com/fabianp/9396204419c7b638d38f
    to return one value instead of partial correlation matrix
    """

    ## regress covariates on both X and Y
    beta_x = linalg.lstsq(Z, X)[0]
    beta_y = linalg.lstsq(Z, Y)[0]

    ## take residuals of above regression
    res_x = X - Z.dot(beta_x)
    res_y = Y - Z.dot(beta_y)

    ## correlate the residuals to get partial corr
    pcorr = stats.pearsonr(res_x, res_y)[0]

    ## return the partial correlation
    return pcorr
Esempio n. 18
0
    def train_(self, d):
        if type(self.heog) == str:
            self.heog = d.feat_lab[0].index(self.heog)
        if type(self.veog) == str:
            self.veog = d.feat_lab[0].index(self.veog)
        if type(self.reog) == str:
            self.reog = d.feat_lab[0].index(self.reog)

        self.eog = set([self.heog, self.veog, self.reog])
        if self.eeg is None:
            self.eeg = set(range(d.nfeatures)) - self.eog
        else:
            self.eeg = set([d.feat_lab[0].index(ch) if type(ch) == str else ch
                            for ch in self.eeg])

        s = get_samplerate(d)

        # Extract EOG trials
        d_sliced = slice(d, self.mdict, (int(-1*s), int(1.5*s)))

        # Average the trials and baseline them
        d_erp = erp(d_sliced, enforce_equal_n=False)
        #d_erp = baseline(d_erp, (0, int(0.5*s)))
        d_erp = baseline(d_erp, (0, int(2.5*s)))

        # Concatenate blink trials and eye movement trials
        d_blink = concatenate_trials(d_erp[0])
        d_movement = concatenate_trials(d_erp[1:])

        # Calculate Bh and Bv
        v1 = np.vstack((
            np.ones(d_movement.ninstances),
            d_movement.data[self.heog,:],
            d_movement.data[self.veog,:]
        )).T

        coeff1,_,_,_ = linalg.lstsq(v1,d_movement.data.T)
        self.Bh = coeff1[1,:]
        self.Bv = coeff1[2,:]

        # Remove HEOG and VEOG from the blink data
        corr1 = np.zeros(d_blink.data.T.shape)
        for channel in range(d_blink.nfeatures):
            corr1[:, channel] = d_blink.data[channel,:] - d_blink.data[self.heog,:]*self.Bh[channel] - d_blink.data[self.veog,:]*self.Bv[channel]
            
        # Calculate Br    
        v2 = np.vstack((
            np.ones(d_blink.ninstances),
            corr1[:,self.reog]
        )).T
        coeff2,_,_,_ = linalg.lstsq(v2, corr1)
        self.Br = coeff2[1,:]
def matrix_factor_ALS(matrix, dim, num_iters):
  #initialization of two factors: small random (between -1 and 1)
  #NOTE: scipy sparse linalg lstsq converts matrices to numpy arrays anyway
  #So we just initialize factors as numpy arrays to save the trouble
  factor1 = np.random.random((matrix.shape[0], dim))
  factor2 = np.random.random((dim, matrix.shape[0]))
  for iteration in range(num_iters):
    #solve 2 least squares problems
    #one fixing the second factor and solving for the first
    #then fix first factor and solve for the second
    factor1 = lstsq(factor2.transpose(), matrix.A)[0]
    factor2 = lstsq(factor1.transpose(), matrix.A)[0]
  return sp.csr_matrix(factor1), sp.csr_matrix(factor2)
Esempio n. 20
0
	def execute(self, bem):
		""" Compute potential unknow data (gradients for free surface, and
		potentials for the other ones).
		@param bem Boundary Element Method instance.
		"""
		[bem['Ap'], residues, rank, s]  = la.lstsq(bem['A'], bem['B'])
		if(rank < bem['N']):
			FreeCAD.Console.PrintError("\t\t[Sim]: Solving velocity potentials.\n")
			FreeCAD.Console.PrintError("\t\t\tEffective rank of linear system matrix is %i (N = %i)\n" % (rank, bem['N']))
		[bem['Adp'], residues, rank, s] = la.lstsq(bem['A'], bem['dB'])
		if(rank < bem['N']):
			FreeCAD.Console.PrintError("\t\t[Sim]: Solving acceleration potentials.\n")
			FreeCAD.Console.PrintError("\t\t\tEffective rank of linear system matrix is %i (N = %i)\n" % (rank, bem['N']))
 def trialFunFit(self, s, arr, alphas, pairs=None):
     # construct matrix for extended fitting problem
     A = np.concatenate((1. / (s[:,None] + alphas[None,:]), \
                             arr[:,None] / (s[:,None] + alphas[None,:])), axis=1)
     # find auxiliary residues
     c = la.lstsq(A, arr)[0][-len(alphas):]
     # find zeros of fitted auxiliary function
     H = np.diag(alphas) - np.dot(np.ones((len(alphas),1), dtype=complex), c[None,:])
     alphanew = np.linalg.eig(H)[0]
     # find real residues
     Anew = 1. / (s[:,None] + alphanew[None,:])
     cnew = la.lstsq(Anew, arr)[0]
     
     return alphanew, cnew, None
Esempio n. 22
0
def partial_corr(C, verbose=0):
    """
    Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling 
    for the remaining variables in C.


    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable


    Returns
    -------
    P : array-like, shape (p, p)
        P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling
        for the remaining variables in C.
    """
    
    C = np.asarray(C)
    C = C[~np.isnan(C.sum(1))]

    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    if verbose >= 1:
        print("Looping over %d variables..." % p)
    for i in range(p):
        if i % 25 == 24:
            print("\tLoop %d of %d" % (i + 1, p))
        P_corr[i, i] = 1
        for j in range(i+1, p):
            idx = np.ones(p, dtype=np.bool)
            idx[i] = False
            idx[j] = False
            #beta_i = OLS(C[:, idx], C[:, j]).fit().params.squeeze()
            #beta_j = OLS(C[:, idx], C[:, i]).fit().params.squeeze()

            beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

            res_j = C[:, j] - C[:, idx].dot(beta_i)
            res_i = C[:, i] - C[:, idx].dot(beta_j)
            
            corr = stats.pearsonr(res_i, res_j)[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr
    if verbose >= 1:
        print("Done.")
    return P_corr
def test_for_CI(G, n1, n2, normalized_data, order, alpha):
    """Test if n1 and n2 are conditionally independent. 

    If they are not return None, else return the conditional independence set.
    """
    N = normalized_data.shape[1]
    ones = numpy.ones((N,1), dtype=float)
    
    n1_resp = normalized_data[n1,:]    
    n1_neighbors = set(G.neighbors(n1))    
    
    n2_resp = normalized_data[n2,:]
    n2_neighbors = set(G.neighbors(n2))
    
    common_neighbors = n1_neighbors.intersection(n2_neighbors) - set((n1, n2))
    # if there aren't enough neighbors common to n1 and n2, return none
    if len(common_neighbors) < order: 
        return None
    
    min_score = 1e100
    best_p_val = None
    best_neighbors = None
    n_common_neighbors = 0
    for covariates in combinations(common_neighbors, order):
        n_common_neighbors += 1
        predictors = numpy.hstack(
            (ones, normalized_data[numpy.array(covariates),:].T))
        # test if node is independent of neighbors given for some subset
        rv1, _, _, _ = lstsq(predictors, n1_resp)
        rv2, _, _, _ = lstsq(predictors, n2_resp)
        cor, pval =  pearsonr(n1_resp - rv1.dot(predictors.T), 
                              n2_resp - rv2.dot(predictors.T))
        if abs(cor) < min_score:
            min_score = abs(cor)
            best_neighbors = covariates
            best_p_val = pval
        # make the multiple testing correction /n_common_neighbors
        if best_p_val < alpha/n_common_neighbors:
            return None
        #score = math.sqrt(N-order-3)*0.5*math.log((1+cor)/(1-cor))
        #print abs(score),  norm.isf(alpha/(len(neighbors)*2)), cor, pval
        #if abs(score) < norm.isf(alpha/(len(neighbors)*2)): 
    
    # make the multiple testing correction /n_common_neighbors
    if best_p_val < alpha/n_common_neighbors:
        return None
    else:
        return best_neighbors
Esempio n. 24
0
def preProcess(u,y,NumDict):
    
    NumInputs = u.shape[0]
    NumOutputs = y.shape[0]
    NumRows = NumDict['Rows']
    NumCols = NumDict['Columns']
    NSig = NumDict['Dimension']
    UPast,UFuture = getHankelMatrices(u,NumRows,NumCols)
    YPast,YFuture = getHankelMatrices(y,NumRows,NumCols)
    Data = np.vstack((UPast,UFuture,YPast))
    L = la.lstsq(Data.T,YFuture.T)[0].T
    Z = np.dot(L,Data)
    DataShift = np.vstack((UPast,UFuture[NumInputs:],YPast))
    LShift = la.lstsq(DataShift.T,YFuture[NumOutputs:].T)[0].T
    ZShift = np.dot(LShift,DataShift)

    L1 = L[:,:NumInputs*NumRows]
    L3 = L[:,2*NumInputs*NumRows:]

    LPast = np.hstack((L1,L3))
    DataPast = np.vstack((UPast,YPast))

    U, S, Vt = la.svd(np.dot(LPast,DataPast))
    
    Sig = np.diag(S[:NSig])
    SigRt = np.diag(np.sqrt(S[:NSig]))
    Gamma = np.dot(U[:,:NSig],SigRt)
    GammaLess = Gamma[:-NumOutputs]

    GammaPinv = la.pinv(Gamma)
    GammaLessPinv = la.pinv(GammaLess)

    GamShiftSolve = la.lstsq(GammaLess,ZShift)[0]


    GamSolve = la.lstsq(Gamma,Z)[0]
    GamData = np.vstack((GamSolve,UFuture))

    GamYData = np.vstack((GamShiftSolve,YFuture[:NumOutputs]))
    # Should probably move to a better output structure
    # One that doesn't depent so heavily on ordering

    GammaDict = {'Data':GamData,
                 'DataLess':GammaLess,
                 'DataY':GamYData,
                 'Pinv': GammaPinv,
                 'LessPinv': GammaLessPinv}
    return GammaDict,S
Esempio n. 25
0
 def fgmres(self,rhs,tol=1e-6,restrt=None,maxiter=None,callback=None):
     if maxiter == None:
         maxiter = len(rhs)
     if restrt == None:
         restrt = 2*maxiter
     # implemented as in [Saad, 1993]
     # start
     x = zeros(len(rhs))
     H = zeros((restrt+1, restrt))
     V = zeros((len(rhs),restrt))
     Z = zeros((len(rhs),restrt))
     # Arnoldi process (with modified Gramm-Schmidt)
     res = 1.
     j = 0
     r = rhs - self.point.matvec(x)
     beta = norm(r)
     V[:,0]=r/beta
     while j < maxiter and res > tol:
         Z[:,j] = self.point.psolve(V[:,j])
         w = self.point.matvec(Z[:,j])
         for i in range(j+1):
             H[i,j]=dot(w,V[:,i])
             w = w - H[i,j]*V[:,i]
         H[j+1,j] = norm(w)
         V[:,j+1]=w/H[j+1,j]
         e = zeros(j+2)
         e[0]=1.
         y, res, rank, sing_val = lstsq(H[:j+2,:j+1],beta*e)
         j += 1
         print "# GMRES| iteration :", j, "res: ", res/beta
         self.resid = r_[self.resid,res/beta]
         Zy = dot(Z[:,:j],y)
     x = x + Zy
     info = 1
     return (x,info)
Esempio n. 26
0
    def _train(self, data):
        """Train the classifier using `data` (`Dataset`).
        """

        if self.__implementation == "direct":
            # create matrices to solve with additional penalty term
            # determine the lambda matrix
            if self.__lm is None:
                # Not specified, so calculate based on .05*nfeatures
                Lambda = .05*data.nfeatures*np.eye(data.nfeatures)
            else:
                # use the provided penalty
                Lambda = self.__lm*np.eye(data.nfeatures)

            # add the penalty term
            a = np.concatenate( \
                (np.concatenate((data.samples, np.ones((data.nsamples, 1))), 1),
                    np.concatenate((Lambda, np.zeros((data.nfeatures, 1))), 1)))
            b = np.concatenate((data.sa[self.get_space()].value,
                               np.zeros(data.nfeatures)))

            # perform the least sq regression and save the weights
            self.w = lstsq(a, b)[0]
        else:
            raise ValueError, "Unknown implementation '%s'" \
                              % self.__implementation
Esempio n. 27
0
    def process(self, data, obs_vec):
        """
        Solve optimization problem to get :math:`\\alpha`.

        :param phi_mat: Training data matrix :math:`\Phi\in\mathbb{R}^{d\\times n}`
        :type phi_mat: numpy array
        :param obs_vec: Observation vector :math:`y\in\mathbb{R}^{n}`
        :type obs_vec: numpy array
        :return: `alpha`: estimated state vector :math:`\\alpha\in\mathbb{R}^{d}`
        :rtype:  numpy array
        """
        # check consistency of data
        obs_num = obs_vec.shape[0]
        data_num = data.shape[0]

        if obs_num == data_num:
            self.data = data

            # take into account both options
            if self.lam is 0:
                k_mat = kernel(data, data, self.k_type)
            else:
                dim = data.shape[1]
                k_mat = kernel(data, data, self.k_type) + self.lam*eye(dim)

            self.alpha = lstsq(k_mat, obs_vec.transpose())[0]
            return self.alpha
        else:
            print "ERROR: number of samples for data and observations must be the same"
def polynomialFit(x, y, order):
    X = np.array([[xi ** i for i in range(order + 1)] for xi in x])
    Y = np.array(y).reshape((-1, 1))
    # W = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y)
    W, _, _, _ = linalg.lstsq(X, Y)
    # print(W)
    return W
Esempio n. 29
0
def test_multinomial_grad_hess():
    rng = np.random.RandomState(0)
    n_samples, n_features, n_classes = 100, 5, 3
    X = rng.randn(n_samples, n_features)
    w = rng.rand(n_classes, n_features)
    Y = np.zeros((n_samples, n_classes))
    ind = np.argmax(np.dot(X, w.T), axis=1)
    Y[range(0, n_samples), ind] = 1
    w = w.ravel()
    sample_weights = np.ones(X.shape[0])
    grad, hessp = _multinomial_grad_hess(w, X, Y, alpha=1.,
                                         sample_weight=sample_weights)
    # extract first column of hessian matrix
    vec = np.zeros(n_features * n_classes)
    vec[0] = 1
    hess_col = hessp(vec)

    # Estimate hessian using least squares as done in
    # test_logistic_grad_hess
    e = 1e-3
    d_x = np.linspace(-e, e, 30)
    d_grad = np.array([
        _multinomial_grad_hess(w + t * vec, X, Y, alpha=1.,
                               sample_weight=sample_weights)[0]
        for t in d_x
    ])
    d_grad -= d_grad.mean(axis=0)
    approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
    assert_array_almost_equal(hess_col, approx_hess_col)
Esempio n. 30
0
def glm(event_matrix, Q, voxels, hrf_function=None, downsample=1,
        convolve=True):
    """
    Perform a GLM from an event matrix
    and return estimated HRFs and associated coefficients
    Q: basis
    """
    Q = np.asarray(Q)
    if Q.ndim == 1:
        Q = Q[:, None]
    if hrf_function is None:
        hrf_function = Q[:, 0]
    if convolve:
        glm_design = convolve_events(event_matrix, Q)[::downsample]
    else:
        glm_design = event_matrix
    n_basis = Q.shape[1]
    n_trials = glm_design.shape[1] / n_basis
    n_voxels = voxels.shape[-1]
    full_betas = linalg.lstsq(glm_design, voxels)[0]
    full_betas = full_betas.reshape(n_basis, n_trials, n_voxels, order='F')
    hrfs = full_betas.T.dot(Q.T)
    sign = np.sign((hrfs * hrf_function).sum(-1))
    hrfs = hrfs * sign[..., None]
    norm = hrfs.max(-1)
    hrfs /= norm[..., None]
    betas = norm * sign
    return hrfs.T, betas.T
Esempio n. 31
0
def QITE_step(H_, psi_, db, xv, check):

    import time

    nalpha = len(H_)
    dn_ = 1.0

    if (xv is None):
        xv = []
        for alpha in range(nalpha):
            (A, h, imp, gmp) = H_[alpha]
            nact = imp.shape[0]
            xv.append(np.zeros(nact))

    for alpha in range(nalpha):
        # ----- target state

        t0 = time.time()
        delta_alpha, dnalpha_ = ExpmbH_alpha(H_, psi_, alpha, db)
        delta_alpha -= psi_.copy()
        dn_ *= dnalpha_
        Xop = []
        # ----- pauli action
        (A, h, imp, gmp) = H_[alpha]
        nact = imp.shape[0]
        # print('active:',imp)
        Pmu_psi = np.zeros(imp.shape, dtype=complex)
        for m in range(nact):
            Pmu_psi[m, :] = gmp[m, imp[m, :]] * psi_[imp[m, :]]

        t1 = time.time()

        # ----- set linear system
        Amat = np.dot(np.conj(Pmu_psi), Pmu_psi.T)
        # print('Amat:\n',Amat)
        Amat = 2.0 * np.real(Amat)

        t2 = time.time()
        bvec = np.dot(Pmu_psi, np.conj(delta_alpha))
        bvec = -2.0 * np.imag(bvec)
        t3 = time.time()

        if (check):
            x = SciLA.lstsq(Amat, bvec)[0]
        else:
            zct = np.dot(bvec, Amat)

            def cost_fun(vct):
                return LA.norm(np.dot(Amat, vct) - bvec)**2

            def J_cost_fun(vct):
                wct = np.dot(Amat, vct)
                wct = np.dot(Amat.T, wct)
                return 2.0 * (wct - zct)

            import scipy
            x = scipy.optimize.minimize(cost_fun,
                                        x0=xv[alpha],
                                        method='Newton-CG',
                                        jac=J_cost_fun,
                                        tol=1e-8).x
            xv[alpha] = x.copy()

        print('Pauli Operator')
        Xop.append((A, x, imp, gmp))
        print_Hamiltonian(Xop)
        # print_Hamiltonian(xv)
        #print('\n wavefunction before\n',Pmu_psi)
        t4 = time.time()
        psi_ = Exp_ixP(x, psi_, imp, gmp)
        #print('\n wavefunction after\n', psi_,'\n')
        t5 = time.time()

        # print alpha,t5-t4,t4-t3,t3-t2,t2-t1,t1-t0
        import sys
        sys.stdout.flush()
    # print('op:\n',xv)
    return psi_, dn_, xv, Xop
Esempio n. 32
0
    if trend_method == "corners":
        hull = ConvexHull(survey.srcField.rxList[0].locs[:,:2])
        # Extract only those points that make the ConvexHull
        pts = np.c_[survey.srcField.rxList[0].locs[hull.vertices,:2], survey.dobs[hull.vertices]]
    else:
        # Extract all points
        pts = np.c_[survey.srcField.rxList[0].locs[:,:2], survey.dobs]

    if trend_order == 0:
        data_trend = np.mean(pts[:,2]) * np.ones(rxLoc[:,0].shape)
        print('Removed data mean: {0:.6g}'.format(data_trend[0]))
        
    elif trend_order == 1:
        # best-fit linear plane
        A = np.c_[pts[:,0], pts[:,1], np.ones(pts.shape[0])]
        C,_,_,_ = lstsq(A, pts[:,2])    # coefficients
        
        # evaluate at all data locations
        data_trend = C[0]*rxLoc[:,0] + C[1]*rxLoc[:,0] + C[2]
    
    elif trend_order == 2:
        # best-fit quadratic curve
        A = np.c_[np.ones(pts.shape[0]), pts[:,:2], np.prod(pts[:,:2], axis=1), pts[:,:2]**2]
        C,_,_,_ = lstsq(A, pts[:,2])
        
        # evaluate at all data locations
        data_trend = np.dot(np.c_[
                np.ones(rxLoc[:,0].shape), rxLoc[:,0], rxLoc[:,1], rxLoc[:,0]*rxLoc[:,1], rxLoc[:,0]**2, rxLoc[:,1]**2
                ], C).reshape(rxLoc[:,0].shape)
        
    survey.dobs -= data_trend
Esempio n. 33
0
File: mfa.py Progetto: spillai/utils
def mfa(X, hdim, C, maxiters, W=None, M=None, psi=None, pi=None, eps=1e-2):
    """Fit a Mixture of FA.

    _X_ is dataset in _rows_. _hdim_ is the
    latent dimension, the same for all _C_
    classes.
    """
    # pre calculation of some 'constants'.
    N, d = X.shape
    Ih = np.eye(hdim)
    ll_const = -d / 2. * np.log(2 * np.pi)
    X_sq = X**2

    if W is None:
        W = np.random.randn(C, hdim, d)
    if M is None:
        tmp = np.random.permutation(N)
        M = X[tmp[:C]].copy()
    if psi is None:
        psi = 100 * np.var(X) * np.ones((C, d))
    if pi is None:
        pi = np.ones(C) / C

    # pre allocating some helper memory
    E_z = np.zeros((C, N, hdim))
    Cov_z = np.zeros((C, hdim, hdim))
    # store loglikelihood
    ll = np.zeros((C, N))

    last_ll = -np.inf
    loglike = []
    for i in xrange(maxiters):
        for c in xrange(C):
            # W_c is hdim x d
            W_c = W[c]
            mu_c = M[c]
            # psi_c is D
            psi_c = psi[c]
            fac = W_c / psi_c
            # see Bishop, p. 93, eq. 2.117
            cov_z = la.inv(Ih + np.dot(fac, W_c.T))
            tmp = np.dot(X - mu_c, fac.T)
            # latent expectations
            E_z[c, :, :] = np.dot(tmp, cov_z)
            # latent _covariance_
            Cov_z[c, :, :] = cov_z
            # loglikelihood
            # woodbury identity
            inv_cov_x = np.diag(1. / psi_c) - np.dot(fac.T, np.dot(cov_z, fac))
            _, _det = np.linalg.slogdet(inv_cov_x)
            tmp = np.dot(X - mu_c, inv_cov_x)
            # integrating out latent z's -> again, Bishop, p. 93, eq. 2.115
            ll[c, :] = np.log(pi[c]) + ll_const + 0.5 * _det - 0.5 * np.sum(
                tmp * (X - mu_c), axis=1)
        # posterior class distribution given data
        posteriors = norm_logprob(ll, axis=0)
        # loglikelihood over all datapoints
        ll_sum = np.sum(logsumexp(ll, axis=0))
        loglike.append(ll_sum)

        if ll_sum - last_ll < eps:
            break
        last_ll = ll_sum

        for c in xrange(C):
            z = np.append(E_z[c, :, :], np.ones((N, 1)), axis=1)
            wz = posteriors[c][:, np.newaxis] * z
            wzX = np.dot(wz.T, X)
            wzz = np.dot(wz.T, z)
            N_c = posteriors[c].sum()
            wzz[:hdim, :hdim] += N_c * Cov_z[c, :, :]

            sol = la.lstsq(wzz, wzX)[0]

            M[c, :] = sol[hdim, :]
            W[c, :, :] = sol[:hdim, :]
            psi[c, :] = (np.dot(posteriors[c], X_sq) -
                         np.sum(sol * wzX, axis=0)) / N_c
            psi[c, :] = np.maximum(psi[c, :], SMALL)
            pi[c] = N_c / N
    return W, M, psi, pi, loglike
Esempio n. 34
0
 def test_simple_exact(self):
     a = [[1, 20], [-30, 4]]
     for b in ([[1, 0], [0, 1]], [1, 0], [[2, 1], [-30, 4]]):
         x = lstsq(a, b)[0]
         assert_array_almost_equal(dot(a, x), b)
Esempio n. 35
0
 def test_simple_overdet_complex(self):
     a = [[1 + 2j, 2], [4, 5], [3, 4]]
     b = [1, 2 + 4j, 3]
     x, res, r, s = lstsq(a, b)
     assert_array_almost_equal(x, direct_lstsq(a, b, cmplx=1))
     assert_almost_equal(res, (abs(dot(a, x) - b)**2).sum(axis=0))
Esempio n. 36
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None):
    print 'Parsing PLINK bed file: %s' % bed_file
    num_individs = len(phen_map)
    assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'

    if split_by_chrom:
        raw_effects_prs = sp.zeros(num_individs)
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    print 'Working on chromosome %d' % i
                    prs_dict = get_prs(genotype_file, rs_id_map, phen_map)

                    raw_effects_prs += prs_dict['raw_effects_prs']
                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            else:
                print 'Skipping chromosome'

    else:
        prs_dict = get_prs(bed_file, rs_id_map, phen_map)
        raw_effects_prs = prs_dict['raw_effects_prs']
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']
        true_phens = prs_dict['true_phens']

    # Report prediction accuracy
    raw_eff_corr = sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0, 1]
    raw_eff_r2 = raw_eff_corr**2
    pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                prs_dict['true_phens'])[0, 1]
    pval_eff_r2 = pval_eff_corr**2

    print 'Final raw effects PRS correlation: %0.4f' % raw_eff_corr
    print 'Final raw effects PRS r2: %0.4f' % raw_eff_r2
    print 'Final weighted effects PRS correlation: %0.4f' % pval_eff_corr
    print 'Final weighted effects PRS r2: %0.4f' % pval_eff_r2

    res_dict = {'pred_r2': pval_eff_r2}

    raw_effects_prs.shape = (len(raw_effects_prs), 1)
    pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
    true_phens = sp.array(true_phens)
    true_phens.shape = (len(true_phens), 1)

    # Store covariate weights, slope, etc.
    weights_dict = {}

    # Store Adjusted predictions
    adj_pred_dict = {}

    # Direct effect
    Xs = sp.hstack([pval_derived_effects_prs, sp.ones((len(true_phens), 1))])
    (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                        true_phens)
    (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
    pred_r2 = 1 - rss / rss00
    weights_dict['unadjusted'] = {
        'Intercept': betas[1][0],
        'ldpred_prs_effect': betas[0][0]
    }

    # Adjust for sex
    if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
        sex = sp.array(prs_dict['sex'])
        sex.shape = (len(sex), 1)
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, sex,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs, sex,
             sp.ones((len(true_phens), 1))])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['sex_adj'] = {
            'Intercept': betas[2][0],
            'ldpred_prs_effect': betas[0][0],
            'sex': betas[1][0]
        }
        print 'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:', betas
        adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'Sex adjusted prediction + Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

    # Adjust for PCs
    if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
        pcs = prs_dict['pcs']
        (betas, rss0, r,
         s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]),
                           true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack([raw_effects_prs, pcs,
                       sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1)), pcs])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        weights_dict['pc_adj'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0],
            'pcs': betas[2][0]
        }
        adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'PC adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'PC adjusted prediction + PCs (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['PC_adj_pred_r2+PC'] = pred_r2

        # Adjust for both PCs and Sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]),
                true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack(
                    [raw_effects_prs, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, sex,
                sp.ones((len(true_phens), 1)), pcs
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_pc_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0],
                'pcs': betas[3][0]
            }
            adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss / rss00
            print 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss_pd / rss0
            print 'PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print 'PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

    # Adjust for covariates
    if adjust_for_covariates and 'covariates' in prs_dict and len(
            prs_dict['covariates']) > 0:
        covariates = prs_dict['covariates']
        (betas, rss0, r, s) = linalg.lstsq(
            sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens)
        (betas, rss, r, s) = linalg.lstsq(
            sp.hstack(
                [raw_effects_prs, covariates,
                 sp.ones((len(true_phens), 1))]), true_phens)
        Xs = sp.hstack([
            pval_derived_effects_prs, covariates,
            sp.ones((len(true_phens), 1))
        ])
        (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
        adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
        pred_r2 = 1 - rss / rss0
        print 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss / rss00
        print 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        pred_r2 = 1 - rss_pd / rss0
        print 'Cov adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['Cov_adj_pred_r2'] = pred_r2
        pred_r2 = 1 - rss_pd / rss00
        print 'Cov adjusted prediction + Cov (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
            pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
        res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

        if adjust_for_pcs and 'pcs' in prs_dict and len(
                prs_dict['pcs']) and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
            pcs = prs_dict['pcs']
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack(
                    [covariates, sex, pcs,
                     sp.ones((len(true_phens), 1))]), true_phens)
            (betas, rss, r, s) = linalg.lstsq(
                sp.hstack([
                    raw_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates, sex, pcs,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss / rss0
            print 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss / rss00
            print 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with raw effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            pred_r2 = 1 - rss_pd / rss0
            print 'Cov+PCs+Sex adjusted prediction accuracy (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print 'Cov+PCs+Sex adjusted prediction and PCs+Sex (R^2) for the whole genome PRS with weighted effects was: %0.4f (%0.6f)' % (
                pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))
            res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

    # Now calibration
    y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
    denominator = sp.dot(raw_effects_prs.T, raw_effects_prs)
    numerator = sp.dot(raw_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print 'The slope for predictions with raw effects is:', regression_slope

    denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs)
    numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
    regression_slope = (numerator / denominator)[0][0]
    print 'The slope for predictions with weighted effects is:', regression_slope

    num_individs = len(prs_dict['pval_derived_effects_prs'])

    # Write PRS out to file.
    if out_file != None:
        with open(out_file, 'w') as f:
            out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs'
            if 'sex' in prs_dict:
                out_str = out_str + ', sex'
            if 'pcs' in prs_dict:
                pcs_str = ', '.join([
                    'PC%d' % (1 + pc_i)
                    for pc_i in range(len(prs_dict['pcs'][0]))
                ])
                out_str = out_str + ', ' + pcs_str
            out_str += '\n'
            f.write(out_str)
            for i in range(num_individs):
                out_str = '%s, %0.6e, %0.6e, %0.6e, ' % (
                    prs_dict['iids'][i], prs_dict['true_phens'][i],
                    raw_effects_prs[i], pval_derived_effects_prs[i])
                if 'sex' in prs_dict:
                    out_str = out_str + '%d, ' % prs_dict['sex'][i]
                if 'pcs' in prs_dict:
                    pcs_str = ', '.join(map(str, prs_dict['pcs'][i]))
                    out_str = out_str + pcs_str
                out_str += '\n'
                f.write(out_str)

        if len(adj_pred_dict.keys()) > 0:
            with open(out_file + '.adj', 'w') as f:
                adj_prs_labels = adj_pred_dict.keys()
                out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs, ' + \
                    ', '.join(adj_prs_labels)
                out_str += '\n'
                f.write(out_str)
                for i in range(num_individs):
                    out_str = '%s, %0.6e, %0.6e, %0.6e' % (
                        prs_dict['iids'][i], prs_dict['true_phens'][i],
                        raw_effects_prs[i], pval_derived_effects_prs[i])
                    for adj_prs in adj_prs_labels:
                        out_str += ', %0.4f' % adj_pred_dict[adj_prs][i]
                    out_str += '\n'
                    f.write(out_str)
        if weights_dict != None:
            oh5f = h5py.File(out_file + '.weights.hdf5', 'w')
            for k1 in weights_dict.keys():
                kg = oh5f.create_group(k1)
                for k2 in weights_dict[k1]:
                    kg.create_dataset(k2, data=sp.array(weights_dict[k1][k2]))
            oh5f.close()
    return res_dict
Esempio n. 37
0
def find_roots_2d(coef1, coef2, tol=1e-3):
    """
    Find the common roots of two bivariate polynomials with coefficients specified by
    two 2D arrays.
    the variation along the first dimension (i.e., columns) is in the increasing order of y.
    the variation along the second dimension (i.e., rows) is in the increasing order of x.
    :param coef1: polynomial coefficients the first polynomial for the annihilation along rows
    :param coef2: polynomial coefficients the second polynomial for the annihilation along cols
    :return:
    """
    coef1 /= np.max(np.abs(coef1))
    coef2 /= np.max(np.abs(coef2))
    log_tol = np.log10(tol)
    # assert coef_col.shape[0] >= coef_row.shape[0] and coef_row.shape[1] >= coef_col.shape[1]
    if coef1.shape[1] < coef2.shape[1]:
        # swap input coefficients
        coef1, coef2 = coef2, coef1
    x, y = sympy.symbols('x, y')  # build symbols
    # collect both polynomials as a function of x; y will be included in the coefficients
    poly1 = 0
    poly2 = 0

    max_row_degree_y, max_row_degree_x = np.array(coef1.shape) - 1
    for x_count in range(max_row_degree_x + 1):
        for y_count in range(max_row_degree_y + 1):
            if np.abs(coef1[y_count, x_count]) > 1e-10:
                poly1 += coef1[y_count, x_count] * x ** (max_row_degree_x - x_count) * \
                         y ** (max_row_degree_y - y_count)
            else:
                coef1[y_count, x_count] = 0

    max_col_degree_y, max_col_degree_x = np.array(coef2.shape) - 1
    for x_count in range(max_col_degree_x + 1):
        for y_count in range(max_col_degree_y + 1):
            if np.abs(coef2[y_count, x_count]) > 1e-10:
                poly2 += coef2[y_count, x_count] * x ** (max_col_degree_x - x_count) * \
                         y ** (max_col_degree_y - y_count)
            else:
                coef2[y_count, x_count] = 0

    poly1_x = sympy.Poly(poly1, x)
    poly2_x = sympy.Poly(poly2, x)

    K_x = max_row_degree_x  # highest power of the first polynomial (in x)
    L_x = max_col_degree_x  # highest power of the second polynomial (in x)

    if coef1.shape[0] == 1:  # i.e., independent of variable y
        x_roots_all = np.roots(coef1.squeeze())
        eval_poly2 = sympy.lambdify(x, poly2)
        x_roots = []
        y_roots = []
        for x_loop in x_roots_all:
            y_roots_loop = np.roots(np.array(sympy.Poly(eval_poly2(x_loop), y).all_coeffs(), dtype=complex))
            y_roots.append(y_roots_loop)
            x_roots.append(np.tile(x_loop, y_roots_loop.size))
        coef_validate = coef2
    elif coef2.shape[1] == 1:  # i.e., independent of variable x
        y_roots_all = np.roots(coef2.squeeze())
        eval_poly1 = sympy.lambdify(y, poly1)
        x_roots = []
        y_roots = []
        for y_loop in y_roots_all:
            x_roots_loop = np.roots(np.array(sympy.Poly(eval_poly1(y_loop), x).all_coeffs(), dtype=complex))
            x_roots.append(x_roots_loop)
            y_roots.append(np.tile(y_loop, x_roots_loop.size))
        coef_validate = coef1
    else:
        if L_x >= 1:
            toep1_r = np.hstack((poly1_x.all_coeffs()[::-1], np.zeros(L_x - 1)))
            toep1_r = np.concatenate((toep1_r, np.zeros(L_x + K_x - toep1_r.size)))
            toep1_c = np.concatenate(([poly1_x.all_coeffs()[-1]], np.zeros(L_x - 1)))
        else:  # for the case with L_x == 0
            toep1_r = np.zeros((0, L_x + K_x))
            toep1_c = np.zeros((0, 0))

        if K_x >= 1:
            toep2_r = np.hstack((poly2_x.all_coeffs()[::-1], np.zeros(K_x - 1)))
            toep2_r = np.concatenate((toep2_r, np.zeros(L_x + K_x - toep2_r.size)))
            toep2_c = np.concatenate(([poly2_x.all_coeffs()[-1]], np.zeros(K_x - 1)))
        else:  # for the case with K_x == 0
            toep2_r = np.zeros((0, L_x + K_x))
            toep2_c = np.zeros((0, 0))

        blk_mtx1 = linalg.toeplitz(toep1_c, toep1_r)
        blk_mtx2 = linalg.toeplitz(toep2_c, toep2_r)
        if blk_mtx1.size != 0 and blk_mtx2.size != 0:
            mtx = np.vstack((blk_mtx1, blk_mtx2))
        elif blk_mtx1.size == 0 and blk_mtx2.size != 0:
            mtx = blk_mtx2
        elif blk_mtx1.size != 0 and blk_mtx2.size == 0:
            mtx = blk_mtx1
        else:
            mtx = np.zeros((0, 0))

        max_y_degree1 = coef1.shape[0] - 1

        max_y_degree2 = coef2.shape[0] - 1

        max_poly_degree = np.int(max_y_degree1 * L_x + max_y_degree2 * K_x)
        num_samples = (max_poly_degree + 1) * 8  # <= 4 is the over-sampling factor used to determined the poly coef.

        # randomly generate y-values
        # y_vals = np.random.randn(num_samples, 1) + \
        #          1j * np.random.randn(num_samples, 1)
        y_vals = np.exp(1j * 2 * np.pi / num_samples * np.arange(num_samples))[:, np.newaxis]
        y_powers = np.reshape(np.arange(max_poly_degree + 1)[::-1], (1, -1), order='F')
        Y = ne.evaluate('y_vals ** y_powers')

        # compute resultant, which is the determinant of mtx.
        # it is a polynomial in terms of variable y
        func_resultant = sympy.lambdify(y, sympy.Matrix(mtx))
        det_As = np.array([linalg.det(np.array(func_resultant(y_roots_loop), dtype=complex))
                           for y_roots_loop in y_vals.squeeze()], dtype=complex)
        coef_resultant = linalg.lstsq(Y, det_As)[0]

        # trim out very small coefficients
        # eps = np.max(np.abs(coef_resultant)) * tol
        # coef_resultant[np.abs(coef_resultant) < eps] = 0

        y_roots_all = np.roots(coef_resultant)
        # check if there're duplicated roots
        y_roots_all = eliminate_duplicate_roots(y_roots_all)

        # use the root values for y to find the root values for x
        # check if poly1_x or poly2_x are constant w.r.t. x
        if len(poly1_x.all_coeffs()) > 1:
            func_loop = sympy.lambdify(y, poly1_x.all_coeffs())
            coef_validate = coef2
        elif len(poly2_x.all_coeffs()) > 1:
            func_loop = sympy.lambdify(y, poly2_x.all_coeffs())
            coef_validate = coef1
        else:
            raise RuntimeError('Neither polynomials contain x')

        x_roots = []
        y_roots = []
        for loop in range(y_roots_all.size):
            y_roots_loop = y_roots_all[loop]
            x_roots_loop = np.roots(func_loop(y_roots_loop))
            # check if there're duplicated roots
            x_roots_loop = eliminate_duplicate_roots(x_roots_loop)
            for roots_loop in x_roots_loop:
                x_roots.append(roots_loop)
            for roots_loop in np.tile(y_roots_loop, x_roots_loop.size):
                y_roots.append(roots_loop)

    x_roots, y_roots = np.array(x_roots).flatten('F'), np.array(y_roots).flatten('F')
    x_roots, y_roots = eliminate_duplicate_roots_2d(x_roots, y_roots)
    # validate based on the polynomial values of the other polynomila
    # that is not used in the last step to get the roots
    poly_val = np.log10(np.abs(
        check_error_2d(coef_validate / linalg.norm(coef_validate.flatten()),
                       x_roots, y_roots)))

    # if the error is 2 orders larger than the smallest error, then we discard the root
    # print(poly_val)
    valid_idx = np.bitwise_or(poly_val < np.min(poly_val) + 2, poly_val < log_tol)
    x_roots = x_roots[valid_idx]
    y_roots = y_roots[valid_idx]

    '''
    Further verification with the resultant w.r.t. y, 
    which should also vanish at the common roots
    '''
    poly1_y = sympy.Poly(poly1, y)
    poly2_y = sympy.Poly(poly2, y)

    K_y = max_row_degree_y  # highest power of the first polynomial (in y)
    L_y = max_col_degree_y  # highest power of the second polynomial (in y)

    if L_y >= 1:
        toep1_r = np.hstack((poly1_y.all_coeffs()[::-1], np.zeros(L_y - 1)))
        toep1_r = np.concatenate((toep1_r, np.zeros(L_y + K_y - toep1_r.size)))
        toep1_c = np.concatenate(([poly1_y.all_coeffs()[-1]], np.zeros(L_y - 1)))
    else:  # for the case with L_y == 0
        toep1_r = np.zeros((0, L_y + K_y))
        toep1_c = np.zeros((0, 0))

    if K_y >= 1:
        toep2_r = np.hstack((poly2_y.all_coeffs()[::-1], np.zeros(K_y - 1)))
        toep2_r = np.concatenate((toep2_r, np.zeros(L_y + K_y - toep2_r.size)))
        toep2_c = np.concatenate(([poly2_y.all_coeffs()[-1]], np.zeros(K_y - 1)))
    else:  # for the case with K_y == 0
        toep2_r = np.zeros((0, L_y + K_y))
        toep2_c = np.zeros((0, 0))

    blk_mtx1 = linalg.toeplitz(toep1_c, toep1_r)
    blk_mtx2 = linalg.toeplitz(toep2_c, toep2_r)
    if blk_mtx1.size != 0 and blk_mtx2.size != 0:
        mtx = np.vstack((blk_mtx1, blk_mtx2))
    elif blk_mtx1.size == 0 and blk_mtx2.size != 0:
        mtx = blk_mtx2
    elif blk_mtx1.size != 0 and blk_mtx2.size == 0:
        mtx = blk_mtx1
    else:
        mtx = np.zeros((0, 0))

    func_resultant_verify = sympy.lambdify((x, y), sympy.Matrix(mtx))

    # evaluate the resultant w.r.t. y at the found roots. it should also vanish if
    # the pair is the common root
    res_y_val = np.zeros(x_roots.size, dtype=float)
    for loop in range(x_roots.size):
        res_y_val[loop] = \
            np.abs(linalg.det(
                np.array(
                    func_resultant_verify(x_roots[loop], y_roots[loop]),
                    dtype=complex
                )))

    log_res_y_val = np.log10(res_y_val)
    valid_idx = np.bitwise_or(log_res_y_val < np.min(log_res_y_val) + 2,
                              log_res_y_val < log_tol)
    x_roots = x_roots[valid_idx]
    y_roots = y_roots[valid_idx]
    return x_roots, y_roots
Esempio n. 38
0
    def buildRectangleModel(self, recBounds, steepness=1):
        '''
		Builds a softmax model in 2 dimensions with a rectangular interior class
		Inputs
		recBounds: A 2x2 list, with the coordinates of the lower left and upper right corners of the rectangle
		steepness: A scalar determining how steep the bounds between softmax classes are
		'''

        B = np.matrix([
            -1, 0, recBounds[0][0], 1, 0, -recBounds[1][0], 0, 1,
            -recBounds[1][1], 0, -1, recBounds[0][1]
        ]).T

        M = np.zeros(shape=(12, 15))

        #Boundry: Left|Near
        rowSB = 0
        classNum1 = 1
        classNum2 = 0
        for i in range(0, 3):
            M[3 * rowSB + i, 3 * classNum2 + i] = -1
            M[3 * rowSB + i, 3 * classNum1 + i] = 1

        #Boundry: Right|Near
        rowSB = 1
        classNum1 = 2
        classNum2 = 0
        for i in range(0, 3):
            M[3 * rowSB + i, 3 * classNum2 + i] = -1
            M[3 * rowSB + i, 3 * classNum1 + i] = 1

        #Boundry: Up|Near
        rowSB = 2
        classNum1 = 3
        classNum2 = 0
        for i in range(0, 3):
            M[3 * rowSB + i, 3 * classNum2 + i] = -1
            M[3 * rowSB + i, 3 * classNum1 + i] = 1

        #Boundry: Down|Near
        rowSB = 3
        classNum1 = 4
        classNum2 = 0
        for i in range(0, 3):
            M[3 * rowSB + i, 3 * classNum2 + i] = -1
            M[3 * rowSB + i, 3 * classNum1 + i] = 1

        A = np.hstack((M, B))
        # print(np.linalg.matrix_rank(A))
        # print(np.linalg.matrix_rank(M))

        Theta = linalg.lstsq(M, B)[0].tolist()

        weight = []
        bias = []
        for i in range(0, len(Theta) // 3):
            weight.append([Theta[3 * i][0], Theta[3 * i + 1][0]])
            bias.append(Theta[3 * i + 2][0])

        steep = steepness
        self.weights = (np.array(weight) * steep).tolist()
        self.bias = (np.array(bias) * steep).tolist()
        self.size = len(self.weights)

        self.alpha = 3
        self.zeta_c = [0] * len(self.weights)
        for i in range(0, len(self.weights)):
            self.zeta_c[i] = random() * 10
Esempio n. 39
0
    def buildPointsModel(self, points, steepness=1):
        '''
		Builds a 2D softmax model by constructing an interior class from the given points
		Inputs
		points: list of 2D points that construct a convex polygon
		steepness: A scalar determining how steep the bounds between softmax classes are
		'''

        dims = 2

        pointsx = [p[0] for p in points]
        pointsy = [p[1] for p in points]
        centroid = [sum(pointsx) / len(points),
                    sum(pointsy) / len(points)]

        #for each point to the next, find the normal  between them.
        B = []
        for i in range(0, len(points)):
            p1 = points[i]

            if (i == len(points) - 1):
                p2 = points[0]
            else:
                p2 = points[i + 1]
            mid = []
            for i in range(0, len(p1)):
                mid.append((p1[i] + p2[i]) / 2)

            H = np.matrix([[p1[0], p1[1], 1], [p2[0], p2[1], 1],
                           [mid[0], mid[1], 1]])

            Hnull = (self.nullspace(H)).tolist()
            distMed1 = self.distance(mid[0] + Hnull[0][0],
                                     mid[1] + Hnull[1][0], centroid[0],
                                     centroid[1])
            distMed2 = self.distance(mid[0] - Hnull[0][0],
                                     mid[1] - Hnull[1][0], centroid[0],
                                     centroid[1])
            if (distMed1 < distMed2):
                Hnull[0][0] = -Hnull[0][0]
                Hnull[1][0] = -Hnull[1][0]
                Hnull[2][0] = -Hnull[2][0]

            for j in Hnull:
                B.append(j[0])

        B = np.matrix(B).T

        numClasses = len(points) + 1
        boundries = []
        for i in range(1, numClasses):
            boundries.append([i, 0])

        M = np.zeros(shape=(len(boundries) * (dims + 1),
                            numClasses * (dims + 1)))

        for j in range(0, len(boundries)):
            for i in range(0, dims + 1):
                M[(dims + 1) * j + i, (dims + 1) * boundries[j][1] + i] = -1
                M[(dims + 1) * j + i, (dims + 1) * boundries[j][0] + i] = 1

        A = np.hstack((M, B))
        #print(np.linalg.matrix_rank(A))
        #print(np.linalg.matrix_rank(M))

        Theta = linalg.lstsq(M, B)[0].tolist()

        weight = []
        bias = []
        for i in range(0, len(Theta) // (dims + 1)):
            weight.append(
                [Theta[(dims + 1) * i][0], Theta[(dims + 1) * i + 1][0]])
            bias.append(Theta[(dims + 1) * i + dims][0])

        steep = steepness
        self.weights = (np.array(weight) * steep).tolist()
        self.bias = (np.array(bias) * steep).tolist()
        self.size = len(self.weights)

        self.alpha = 3
        self.zeta_c = [0] * len(self.weights)
        for i in range(0, len(self.weights)):
            self.zeta_c[i] = random() * 10
Esempio n. 40
0
import socket

from bcgdata import read_bcg_data

x,ex,y,ey = read_bcg_data()

#
# pivot
#
ax = 14.5; x = x - ax
ay = 12.5; y = y - ay

A = np.array([x,np.ones(len(x))])
w = np.linalg.lstsq(A.T,y)[0]
A2 = np.array([np.exp(x),x**2,x,np.ones(len(x))])
c = la.lstsq(A2.T,y)[0]
#c,resid,rank,sigma = linalg.lstsq(A,y)

xf = np.linspace(-1,1,100)
yf  = w[0]*xf + w[1]
yf2 = c[2]*xf + c[3]
yf3 = c[0]*np.exp(xf) + c[1]*xf**2 + c[2]*xf + c[3]
print "numpy slope=",w[0]," intercept=",w[1]
print "scipy slope=",c[2]," intercept=",c[3]
print "cube, square coeff=", c[0], c[1]
#            
# plot results:
#

plt.plot(x,y,'ro',xf,yf)
plt.plot(xf,yf3)
Esempio n. 41
0
def sgt_dist(freqdist, **kwargs):
    """
    Returns a Simple Good-Turing log-probability distribution.

    The returned log-probability distribution is based on the Good-Turing
    frequency estimation, as first developed by Alan Turing and I. J. Good and
    implemented in a more easily computable way by Gale and Sampson's
    (1995/2001 reprint) in the so-called "Simple Good-Turing".

    This implementation is based mostly in the one by "maxbane" (2011)
    (https://github.com/maxbane/simplegoodturing/blob/master/sgt.py), as well
    as in the original one in C by Geoffrey Sampson (1995; 2000; 2005; 2008)
    (https://www.grsampson.net/Resources.html), and in the one by
    Loper, Bird et al. (2001-2018, NLTK Project)
    (http://www.nltk.org/_modules/nltk/probability.html). Please note that
    due to minor differences in implementation intended to guarantee non-zero
    probabilities even in cases of expected underflow, as well as our
    relience on scipy's libraries for speed and our way of handling
    probabilities that are not computable when the assumptions of SGT are
    not met, most results will not exactly match those of the 'gold standard'
    of Gale and Sampson, even though the differences are never expected to
    be significative and are equally distributed across the samples.

    Parameters
    ----------

    freqdist : dict
        Frequency distribution of samples (keys) and counts (values) from
        which the probability distribution will be calculated.

    p_value : float
        The p-value for calculating the confidence interval of the empirical
        Turing estimate, which guides the decision of using either the Turing
        estimate "x" or the loglinear smoothed "y". Defaults to 0.05, as per
        the reference implementation by Sampson, but consider that the authors,
        both in their paper and in the code following suggestions credited to
        private communication with Fan Yang, consider using a value of 0.1.

    allow_fail : bool
        A logic value informing if the function is allowed to fail, throwing
        RuntimeWarning exceptions, if the essential assumptions on the
        frequency distribution are not met, i.e., if the slope of the loglinear
        regression is > -1.0 or if an unobserved count is reached before we are
        able to cross the smoothing threshold. If set to False, the estimation
        might result in an unreliable probability distribution; defaults to
        True.

    default_p0 : float
        An optional value indicating the probability for unobserved samples
        ("p0") in cases where no samples with a single count are observed; if
        this value is not specified, "p0" will default to a Laplace estimation
        for the current frequency distribution. Please note that this is
        intended change from the reference implementation by Gale and Sampson.

    Returns
    -------

    state_prob: dict
        A dictionary of sample to log-probabilities for all the samples in the
        frequency distribution.

    unobserved_prob: float
        The log-probability for samples not found in the frequency
        distribution.
    """

    # Make sure the scientific libraries have been loaded, raising an
    # ImportError if not
    if not np:
        raise ImportError('The package `numpy` is needed by SGT.')
    if not linalg or not stats:
        raise ImportError('The package `scipy` is needed by SGT.')

    # Deal with additional arguments.
    default_p0 = kwargs.get('default_p0', None)
    p_value = kwargs.get('p_value', 0.05)
    allow_fail = kwargs.get('allow_fail', True)

    # Perform basic argument checking.
    _check_probdist_args(freqdist, default_p0=default_p0, p_value=p_value)

    # Calculate the confidence level from the p_value.
    confidence_level = stats.norm.ppf(1. - (p_value / 2.0))

    # Remove all samples with `count` equal to zero.
    freqdist = {
        sample: count
        for sample, count in freqdist.items() if count > 0
    }

    # Prepare vectors for frequencies (`r` in G&S) and frequencies of
    # frequencies (`Nr` in G&S). freqdist.values() is cast to a tuple because
    # we can't consume the iterable a single time. `freqs_keys` is sorted to
    # make vector computations faster later on (so we query lists and not
    # dictionaries).
    freqs = tuple(freqdist.values())
    freqs_keys = sorted(set(freqs))  # r -> n (G&S)
    freqs_of_freqs = {c: freqs.count(c) for c in freqs_keys}

    # The papers and the implementations are not clear on how to calculate the
    # probability of unobserved states in case of missing single-count samples
    # (unless we just fail, of course); Gale and Sampson's C implementation
    # defaults to 0.0, which is not acceptable for our purposes. The solution
    # here offered is to either use an user-provided probability (but in this
    # case we are not necessarily defaulting to _UNOBS, and, in fact, the
    # function argument name is `default_p0` and not `unobs_prob`) or default
    # to a Lidstone smoothing with a gamma of 1.0 (i.e., using Laplace
    # smoothing constant).
    # TODO: Investigate and discuss other possible solutions, including
    #       user-defined `gamma`, `bins`, and/or `N`.
    if 1 in freqs_keys:
        p0 = freqs_of_freqs[1] / sum(freqs)
    else:
        p0 = default_p0 or (1. / (sum(freqs) + 1))

    # Compute Sampson's Z: for each count `j`, we set Z[j] to the linear
    # interpolation of {i, j, k}, where `i` is the greatest observed count less
    # than `j`, and `k` the smallest observed count greater than `j`.
    I = [0] + freqs_keys[:-1]
    K = freqs_keys[1:] + [2 * freqs_keys[-1] - I[-1]]
    Z = {
        j: 2 * freqs_of_freqs[j] / (k - i)
        for i, j, k in zip(I, freqs_keys, K)
    }

    # Compute a loglinear regression of Z[r] over r. We cast keys and values to
    # a list for the computation with `linalg.lstsq`.
    z_keys = list(Z.keys())
    z_values = list(Z.values())
    slope, intercept = \
        linalg.lstsq(np.c_[np.log(z_keys), (1,)*len(z_keys)],
                     np.log(z_values))[0]
    #print ('Regression: log(z) = %f*log(r) + %f' % (slope, intercept))
    if slope > -1.0 and allow_fail:
        raise RuntimeWarning("In SGT, linear regression slope is > -1.0.")

    # Aapply Gale and Sampson's "simple" loglinear smoothing method.
    r_smoothed = {}
    use_y = False
    for r in freqs_keys:
        # `y` is the loglinear smoothing.
        y = float(r+1) * \
            np.exp(slope*np.log(r+1) + intercept) / \
            np.exp(slope*np.log(r) + intercept)

        # If we've already started using `y` as the estimate for `r`, then
        # continue doing so; also start doing so if no samples were observed
        # with count equal to `r+1` (following comments and variable names in
        # both Sampson's C implementation and in NLTK, we check at which
        # point we should `switch`)
        if r + 1 not in freqs_of_freqs:
            if not use_y:
                # An unobserved count was reached before we were able to cross
                # the smoothing threshold; this means that assumptions were
                # not met and the results will likely be off.
                if allow_fail:
                    raise RuntimeWarning(
                        "In SGT, unobserved count before smoothing threshold.")

            use_y = True

        # If we are using `y`, just copy its value to `r_smoothed`, otherwise
        # perform the actual calculation.
        if use_y:
            r_smoothed[r] = y
        else:
            # `estim` is the empirical Turing estimate for `r` (equivalent to
            # `x` in G&S)
            estim = (float(r + 1) * freqs_of_freqs[r + 1]) / freqs_of_freqs[r]

            Nr = float(freqs_of_freqs[r])
            Nr1 = float(freqs_of_freqs[r + 1])

            # `width` is the width of the confidence interval of the empirical
            # Turing estimate (for which Sampson uses 95% but suggests 90%),
            # when assuming independence.
            width = confidence_level * \
                    np.sqrt(float(r+1)**2 * (Nr1 / Nr**2) * (1. + (Nr1 / Nr)))

            # If the difference between `x` and `y` is more than `t`, then the
            # empirical Turing estimate `x` tends to be more accurate.
            # Otherwise, use the loglinear smoothed value `y`.
            if abs(estim - y) > width:
                r_smoothed[r] = estim
            else:
                use_y = True
                r_smoothed[r] = y

    # (Re)normalize and return the resulting smoothed probabilities, less the
    # estimated probability mass of unseen species; please note that we might
    # be unable to calculate some probabilities if the function was not allowed
    # to fail, mostly due to math domain errors. We default to `p0` in all such
    # cases.
    smooth_sum = sum(
        [freqs_of_freqs[r] * r_smooth for r, r_smooth in r_smoothed.items()])

    # Build the probability distribution for the observed samples and for
    # unobserved ones.
    prob_unk = math.log(p0)
    probdist = {}
    for sample, count in freqdist.items():
        prob = (1.0 - p0) * (r_smoothed[count] / smooth_sum)
        if prob == 0.0:
            probdist[sample] = math.log(p0)
        else:
            probdist[sample] = math.log(prob)

    return probdist, prob_unk
Esempio n. 42
0
def __arr_update_core(i, micro_matrix, rhs, solution, rcond, direction):
    """Update TT core for ARR

    Parameters
    ----------
    i: int
        core index
    micro_op: ndarray
        micro matrix for ith TT core
    rhs: ndarray
        right-hand side for ith TT core
    solution: instance of TT class
        approximated solution of the system of linear equations
    rcond: float
        cut-off ratio for singular values of the subproblems, parameter for NumPy's lstsq
    direction: string
        'forward' if first half sweep, 'backward' if second half sweep
    """

    # solve the micro system for the ith TT core
    solution.cores[i], _, _, _ = lin.lstsq(micro_matrix.T,
                                           rhs,
                                           cond=rcond,
                                           lapack_driver='gelss')

    # reshape solution and orthonormalization
    # ---------------------------------------

    # first half sweep
    if direction == 'forward':
        # decompose solution
        [q, _] = lin.qr(solution.cores[i].reshape(
            solution.ranks[i] * solution.row_dims[i], solution.ranks[i + 1]),
                        overwrite_a=True,
                        mode='economic',
                        check_finite=False)

        # set new rank
        solution.ranks[i + 1] = q.shape[1]

        # save orthonormal part
        solution.cores[i] = q.reshape(solution.ranks[i], solution.row_dims[i],
                                      1, solution.ranks[i + 1])

    # second half sweep
    if direction == 'backward':

        if i > 0:

            # decompose solution
            [_, q] = lin.rq(solution.cores[i].reshape(
                solution.ranks[i],
                solution.row_dims[i] * solution.ranks[i + 1]),
                            overwrite_a=True,
                            mode='economic',
                            check_finite=False)

            # set new rank
            solution.ranks[i] = q.shape[0]

            # save orthonormal part
            solution.cores[i] = q.reshape(solution.ranks[i],
                                          solution.row_dims[i], 1,
                                          solution.ranks[i + 1])

        else:

            # last iteration step
            solution.cores[i] = solution.cores[i].reshape(
                solution.ranks[i], solution.row_dims[i], 1,
                solution.ranks[i + 1])
Esempio n. 43
0
 def test_simple_underdet(self):
     a = [[1, 2, 3], [4, 5, 6]]
     b = [1, 2]
     x, res, r, s = lstsq(a, b)
     # XXX: need independent check
     assert_array_almost_equal(x, [-0.05555556, 0.11111111, 0.27777778])
Esempio n. 44
0
def estimate_dem_error(ts0,
                       A0,
                       tbase,
                       drop_date=None,
                       phaseVelocity=False,
                       num_step=0):
    """Estimate DEM error with least square optimization.
    Parameters: ts0 : 2D np.array in size of (numDate, numPixel), original displacement time-series
                A0  : 2D np.array in size of (numDate, model_num), design matrix in [A_geom, A_def]
                tbase : 2D np.array in size of (numDate, 1), temporal baseline
                drop_date : 1D np.array in bool data type, mark the date used in the estimation
                phaseVelocity : bool, use phase history or phase velocity for minimization
    Returns:    delta_z: 2D np.array in size of (1,       numPixel) estimated DEM residual
                ts_cor : 2D np.array in size of (numDate, numPixel),
                            corrected timeseries = tsOrig - delta_z_phase
                ts_res : 2D np.array in size of (numDate, numPixel),
                            residual timeseries = tsOrig - delta_z_phase - defModel
    Example:    delta_z, ts_cor, ts_res = estimate_dem_error(ts, A, tbase, drop_date)
    """
    if len(ts0.shape) == 1:
        ts0 = ts0.reshape(-1, 1)
    if drop_date is None:
        drop_date = np.ones(ts0.shape[0], np.bool_)

    # Prepare Design matrix A and observations ts for inversion
    A = A0[drop_date, :]
    ts = ts0[drop_date, :]
    if phaseVelocity:
        tbase = tbase[drop_date, :]
        A = np.diff(A, axis=0) / np.diff(tbase, axis=0)
        ts = np.diff(ts, axis=0) / np.diff(tbase, axis=0)

    # Inverse using L-2 norm to get unknown parameters X
    # X = [delta_z, constC, vel, acc, deltaAcc, ..., step1, step2, ...]
    # equivalent to X = np.dot(np.dot(np.linalg.inv(np.dot(A.T, A)), A.T), ts)
    #               X = np.dot(np.linalg.pinv(A), ts)
    X = linalg.lstsq(A, ts, cond=1e-15)[0]

    # Prepare Outputs
    delta_z = X[0, :]
    ts_cor = ts0 - np.dot(A0[:, 0].reshape(-1, 1), delta_z.reshape(1, -1))
    ts_res = ts0 - np.dot(A0, X)

    step_def = None
    if num_step > 0:
        step_def = X[-1 * num_step:, :].reshape(num_step, -1)

    # for debug
    debug_mode = False
    if debug_mode:
        import matplotlib.pyplot as plt
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4,
                                                 ncols=1,
                                                 figsize=(8, 8))
        ts_all = np.hstack((ts0, ts_res, ts_cor))
        ymin = np.min(ts_all)
        ymax = np.max(ts_all)
        ax1.plot(ts0, '.')
        ax1.set_ylim((ymin, ymax))
        ax1.set_title('Original  Timeseries')
        ax2.plot(ts_cor, '.')
        ax2.set_ylim((ymin, ymax))
        ax2.set_title('Corrected Timeseries')
        ax3.plot(ts_res, '.')
        ax3.set_ylim((ymin, ymax))
        ax3.set_title('Fitting Residual')
        ax4.plot(ts_cor - ts_res, '.')
        ax4.set_ylim((ymin, ymax))
        ax4.set_title('Fitted Deformation Model')
        plt.show()

    return delta_z, ts_cor, ts_res, step_def
Esempio n. 45
0
 def test_simple_overdet(self):
     a = [[1, 2], [4, 5], [3, 4]]
     b = [1, 2, 3]
     x, res, r, s = lstsq(a, b)
     assert_array_almost_equal(x, direct_lstsq(a, b))
     assert_almost_equal((abs(dot(a, x) - b)**2).sum(axis=0), res)
Esempio n. 46
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit linear model.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples,n_features]
            Training data

        y : numpy array of shape [n_samples, n_targets]
            Target values. Will be cast to X's dtype if necessary

        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample

            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : returns an instance of self.
        """

        n_jobs_ = self.n_jobs
        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csr', 'csc', 'coo'],
                         y_numeric=True,
                         multi_output=True)

        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        if sp.issparse(X):
            if y.ndim < 2:
                out = sparse_lsqr(X, y)
                self.coef_ = out[0]
                self._residues = out[3]
            else:
                # sparse_lstsq cannot handle y with shape (M, K)
                outs = Parallel(n_jobs=n_jobs_)(
                    delayed(sparse_lsqr)(X, y[:, j].ravel())
                    for j in range(y.shape[1]))
                self.coef_ = np.vstack(out[0] for out in outs)
                self._residues = np.vstack(out[3] for out in outs)
        else:
            self.coef_, self._residues, self.rank_, self.singular_ = \
                linalg.lstsq(X, y)
            self.coef_ = self.coef_.T

        if y.ndim == 1:
            self.coef_ = np.ravel(self.coef_)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self
def calc_risk_scores(bimfile_name,
                     rs_id_map,
                     phen_map,
                     K_bins=1,
                     out_file=None,
                     verbose=False,
                     cv_10fold=True,
                     weights_file=None,
                     print_effects=False):
    num_individs = len(phen_map)
    assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'
    #print K_bins
    if K_bins > 1:
        prs_dict_bins = {}
        bk = 1
        while bk <= K_bins:
            prs_dict_bins["pval_derived_effects_prs_bin_%d" %
                          bk] = sp.zeros(num_individs)
            bk += 1

    #print prs_dict_bins.keys()
    if bimfile_name is not None:
        raw_effects_prs = sp.zeros(num_individs)
        pval_derived_effects_prs = sp.zeros(num_individs)

        bimf1 = re.sub(r"\[1:22\]", "[0-9]", bimfile_name)
        bimf2 = re.sub(r"\[1:22\]", "[0-2][0-9]", bimfile_name)
        bimfile_list = glob.glob(bimf1 + ".bim") + glob.glob(bimf2 + ".bim")
        bimfile_list.sort(key=natural_keys)

        for bimfile in bimfile_list:
            genotype_file = re.sub(r".bim", "", bimfile)
            print 'Get PRS on file %s' % bimfile
            prs_dict = get_prs_bins(genotype_file,
                                    rs_id_map,
                                    phen_map=phen_map,
                                    K_bins=K_bins,
                                    verbose=verbose)

            raw_effects_prs += prs_dict['raw_effects_prs']
            pval_derived_effects_prs += prs_dict['pval_derived_effects_prs']
            if K_bins > 1:
                bk = 1
                while bk <= K_bins:
                    prs_dict_bins["pval_derived_effects_prs_bin_%d" %
                                  bk] += prs_dict[
                                      "pval_derived_effects_prs_bin_%d" % bk]
                    bk += 1

        true_phens = prs_dict['true_phens']

    raw_eff_corr = sp.corrcoef(raw_effects_prs, prs_dict['true_phens'])[0, 1]
    raw_eff_r2 = raw_eff_corr**2
    pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                prs_dict['true_phens'])[0, 1]
    pval_eff_r2 = pval_eff_corr**2

    print 'Final raw effects PRS correlation: %0.4f' % raw_eff_corr
    print 'Final raw effects PRS r2: %0.4f' % raw_eff_r2
    print 'Final LDpred-funct-inf PRS correlation: %0.4f' % pval_eff_corr
    print 'Final LDpred-funct-inf  PRS r2: %0.4f' % pval_eff_r2

    if K_bins == 1:
        print "Since the selected/calculated number of bins is 1, LDpred-funct equals to LDpred-funct-inf"

    cv_effects_dir = {}
    if K_bins > 1:
        X = sp.ones((num_individs, 1))
        bk = 1
        while bk <= K_bins:
            prs_dict_bins["pval_derived_effects_prs_bin_%d" % bk].shape = (len(
                prs_dict_bins["pval_derived_effects_prs_bin_%d" % bk]), 1)
            X = sp.hstack(
                [X, prs_dict_bins["pval_derived_effects_prs_bin_%d" % bk]])
            #print(bk)
            #print(X[0:5,])
            bk += 1

        true_phens = sp.array(true_phens)
        true_phens.shape = (len(true_phens), 1)

        (betas, rss0, r, s) = linalg.lstsq(X, true_phens)

        ### In sample fit
        Y_pred = sp.dot(X, betas)
        Y_pred.shape = (len(true_phens), )
        # Report prediction accuracy
        bin_in_sample_eff_corr = sp.corrcoef(Y_pred, prs_dict['true_phens'])[0,
                                                                             1]
        bin_eff_r2 = bin_in_sample_eff_corr**2

        print 'Final in-sample LDpredfunct (%d bins) PRS correlation: %0.4f' % (
            K_bins, bin_in_sample_eff_corr)
        print 'Final in-sample LDpredfunct (%d bins) PRS R2: %0.4f' % (
            K_bins, bin_eff_r2)
        print 'Final in-sample LDpredfunct (%d bins) PRS adjusted-R2: %0.4f' % (
            K_bins, 1 - (1 - bin_eff_r2) * (len(true_phens) - 1) /
            (len(true_phens) - K_bins - 1))

        ###
        if cv_10fold:
            test_size = len(true_phens)
            cv_fold_size = int(test_size / 10)
            bound_cv_test = []
            for k in range(10):
                bound_cv_test.append(k * cv_fold_size)
            bound_cv_test.append(test_size - 1)
            bin_eff_r2_arr = []
            for cv_iter in range(10):
                Xtrain = sp.copy(X)
                Xtest = sp.copy(X)
                Ytrain = sp.copy(true_phens)
                Ytest = sp.copy(true_phens)

                Xtest = Xtest[bound_cv_test[cv_iter]:bound_cv_test[cv_iter +
                                                                   1], ]
                Ytest = Ytest[bound_cv_test[cv_iter]:bound_cv_test[cv_iter +
                                                                   1]]

                Xtrain = sp.delete(
                    Xtrain,
                    range(bound_cv_test[cv_iter], bound_cv_test[cv_iter + 1]),
                    0)
                Ytrain = sp.delete(
                    Ytrain,
                    range(bound_cv_test[cv_iter], bound_cv_test[cv_iter + 1]),
                    0)
                (betas, rss0, r, s) = linalg.lstsq(Xtrain, Ytrain)
                Y_pred = sp.dot(Xtest, betas)
                Y_pred.shape = (len(Ytest), )
                Ytest.shape = (len(Ytest), )
                # Report prediction accuracy
                bin_in_sample_eff_corr = sp.corrcoef(Y_pred, Ytest)[0, 1]
                bin_eff_r2 = bin_in_sample_eff_corr**2
                bin_eff_r2_arr.append(bin_eff_r2)
                if print_effects:
                    cv_effects_dir["cv_%d" % cv_iter] = bin_eff_r2_arr
                    parse_ldpred_res_bins_regularized(
                        weights_file,
                        weights_out_file=weights_file + "cv_%d.txt" % cv_iter,
                        weights=betas,
                        rs_id_map=rs_id_map)

            print 'Final 10-fold cross validation LDpredfunct (%d bins) PRS average R2 : %0.4f ' % (
                K_bins, sp.mean(bin_eff_r2_arr))

    res_dict = {'pred_r2': pval_eff_r2}

    raw_effects_prs.shape = (len(raw_effects_prs), 1)
    pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
    true_phens = sp.array(true_phens)
    true_phens.shape = (len(true_phens), 1)

    # Store covariate weights, slope, etc.
    weights_dict = {}

    num_individs = len(prs_dict['pval_derived_effects_prs'])

    # Write PRS out to file.

    if out_file != None:
        with open(out_file, 'w') as f:
            out_str = 'IID, true_phens, raw_effects_prs, pval_derived_effects_prs'
            if K_bins > 1:
                Kbins_str = ",".join("Bin_%d" % (1 + bin_i)
                                     for bin_i in range(K_bins))
                out_str = out_str + ', ' + Kbins_str
            out_str += '\n'
            f.write(out_str)
            for i in range(num_individs):
                out_str = '%s, %0.6e, %0.6e, %0.6e ' % (
                    prs_dict['iids'][i], prs_dict['true_phens'][i],
                    raw_effects_prs[i], pval_derived_effects_prs[i])
                bins_prs_ind_i = []
                if K_bins > 1:
                    bk = 1
                    while bk <= K_bins:
                        bins_prs_ind_i.append(
                            str(prs_dict_bins["pval_derived_effects_prs_bin_%d"
                                              % bk][i][0]))
                        bk += 1
                    Kbins_str = ', '.join(map(str, bins_prs_ind_i))
                    out_str = out_str + ', ' + Kbins_str
                out_str += '\n'
                f.write(out_str)

        # if weights_dict != None:
        #     oh5f = h5py.File(out_file + '.weights.hdf5', 'w')
        #     for k1 in weights_dict.keys():
        #         kg = oh5f.create_group(k1)
        #         for k2 in weights_dict[k1]:
        #             kg.create_dataset(k2, data=sp.array(weights_dict[k1][k2]))
        #     oh5f.close()
    return res_dict
Esempio n. 48
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit linear model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample

            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : returns an instance of self.
        """

        n_jobs_ = self.n_jobs
        X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],
                                   y_numeric=True, multi_output=True)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X,
                                                 dtype=X.dtype)

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
            copy=self.copy_X, sample_weight=sample_weight,
            return_mean=True)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        if sp.issparse(X):
            X_offset_scale = X_offset / X_scale

            def matvec(b):
                return X.dot(b) - b.dot(X_offset_scale)

            def rmatvec(b):
                return X.T.dot(b) - X_offset_scale * np.sum(b)

            X_centered = sparse.linalg.LinearOperator(shape=X.shape,
                                                      matvec=matvec,
                                                      rmatvec=rmatvec)

            if y.ndim < 2:
                out = sparse_lsqr(X_centered, y)
                self.coef_ = out[0]
                self._residues = out[3]
            else:
                # sparse_lstsq cannot handle y with shape (M, K)
                outs = Parallel(n_jobs=n_jobs_)(
                    delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
                    for j in range(y.shape[1]))
                self.coef_ = np.vstack([out[0] for out in outs])
                self._residues = np.vstack([out[3] for out in outs])
        else:
            self.coef_, self._residues, self.rank_, self.singular_ = \
                linalg.lstsq(X, y)
            self.coef_ = self.coef_.T

        if y.ndim == 1:
            self.coef_ = np.ravel(self.coef_)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self
Esempio n. 49
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None,
                     only_score=False,
                     verbose=False,
                     summary_dict=None):
    if verbose:
        print('Parsing PLINK bed file: %s' % bed_file)

    if split_by_chrom:
        num_individs = len(phen_map)
        assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    if verbose:
                        print('Working on chromosome %d' % i)
                    prs_dict = get_prs(genotype_file,
                                       rs_id_map,
                                       phen_map,
                                       only_score=only_score,
                                       verbose=verbose)

                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            elif verbose:
                print('Skipping chromosome')

    else:
        prs_dict = get_prs(bed_file,
                           rs_id_map,
                           phen_map,
                           only_score=only_score,
                           verbose=verbose)
        num_individs = len(prs_dict['iids'])
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']

    if only_score:
        write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs)
        res_dict = {}
    elif sp.std(prs_dict['true_phens']) == 0:
        if verbose:
            print('No variance left to explain in phenotype.')
        res_dict = {'pred_r2': 0}
    else:
        # Report prediction accuracy
        assert len(
            phen_map
        ) > 0, 'No individuals found.  Problems parsing the phenotype file?'
        # Store covariate weights, slope, etc.
        weights_dict = {}

        # Store Adjusted predictions
        adj_pred_dict = {}

        #If there is no prediction, then output 0s.
        if sp.std(pval_derived_effects_prs) == 0:
            res_dict = {'pred_r2': 0}
            weights_dict['unadjusted'] = {
                'Intercept': 0,
                'ldpred_prs_effect': 0
            }
        else:
            pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                        prs_dict['true_phens'])[0, 1]
            pval_eff_r2 = pval_eff_corr**2

            res_dict = {'pred_r2': pval_eff_r2}

            pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
            true_phens = sp.array(prs_dict['true_phens'])
            true_phens.shape = (len(true_phens), 1)

            # Direct effect
            Xs = sp.hstack(
                [pval_derived_effects_prs,
                 sp.ones((len(true_phens), 1))])
            (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                                true_phens)
            (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
            pred_r2 = 1 - rss / rss00
            weights_dict['unadjusted'] = {
                'Intercept': betas[1][0],
                'ldpred_prs_effect': betas[0][0]
            }

            if verbose:
                print('PRS correlation: %0.4f' % pval_eff_corr)
            print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2)

            # Adjust for sex
            if adjust_for_sex and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([sex, sp.ones((len(true_phens), 1))]),
                    true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, sex,
                    sp.ones((len(true_phens), 1))
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                weights_dict['sex_adj'] = {
                    'Intercept': betas[2][0],
                    'ldpred_prs_effect': betas[0][0],
                    'sex': betas[1][0]
                }
                if verbose:
                    print(
                        'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:',
                        betas)
                adj_pred_dict['sex_prs'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Sex_adj_pred_r2+Sex'] = pred_r2

            # Adjust for PCs
            if adjust_for_pcs and 'pcs' in prs_dict and len(
                    prs_dict['pcs']) > 0:
                pcs = prs_dict['pcs']
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([pcs, sp.ones((len(true_phens), 1))]),
                    true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs,
                    sp.ones((len(true_phens), 1)), pcs
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                weights_dict['pc_adj'] = {
                    'Intercept': betas[1][0],
                    'ldpred_prs_effect': betas[0][0],
                    'pcs': betas[2][0]
                }
                adj_pred_dict['pc_prs'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_adj_pred_r2+PC'] = pred_r2

                # Adjust for both PCs and Sex
                if adjust_for_sex and 'sex' in prs_dict and len(
                        prs_dict['sex']) > 0:
                    sex = sp.array(prs_dict['sex'])
                    sex.shape = (len(sex), 1)
                    (betas, rss0, r, s) = linalg.lstsq(
                        sp.hstack([sex, pcs,
                                   sp.ones((len(true_phens), 1))]), true_phens)
                    Xs = sp.hstack([
                        pval_derived_effects_prs, sex,
                        sp.ones((len(true_phens), 1)), pcs
                    ])
                    (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                    weights_dict['sex_pc_adj'] = {
                        'Intercept': betas[2][0],
                        'ldpred_prs_effect': betas[0][0],
                        'sex': betas[1][0],
                        'pcs': betas[3][0]
                    }
                    adj_pred_dict['sex_pc_prs'] = sp.dot(Xs, betas)
                    pred_r2 = 1 - rss_pd / rss0
                    print(
                        'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)'
                        % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                    res_dict['PC_Sex_adj_pred_r2'] = pred_r2
                    pred_r2 = 1 - rss_pd / rss00
                    print(
                        'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)'
                        % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                    res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

            # Adjust for covariates
            if adjust_for_covariates and 'covariates' in prs_dict and len(
                    prs_dict['covariates']) > 0:
                covariates = prs_dict['covariates']
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([covariates,
                               sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, covariates,
                    sp.ones((len(true_phens), 1))
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                adj_pred_dict['cov_prs'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

                if adjust_for_pcs and 'pcs' in prs_dict and len(
                        prs_dict['pcs']) and 'sex' in prs_dict and len(
                            prs_dict['sex']) > 0:
                    pcs = prs_dict['pcs']
                    sex = sp.array(prs_dict['sex'])
                    sex.shape = (len(sex), 1)
                    (betas, rss0, r, s) = linalg.lstsq(
                        sp.hstack([
                            covariates, sex, pcs,
                            sp.ones((len(true_phens), 1))
                        ]), true_phens)
                    Xs = sp.hstack([
                        pval_derived_effects_prs, covariates, sex, pcs,
                        sp.ones((len(true_phens), 1))
                    ])
                    (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                    adj_pred_dict['cov_sex_pc_prs'] = sp.dot(Xs, betas)
                    pred_r2 = 1 - rss_pd / rss0
                    print(
                        'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)'
                        % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                    res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
                    pred_r2 = 1 - rss_pd / rss00
                    print(
                        'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)'
                        % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                    res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

            # Now calibration
            y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
            denominator = sp.dot(pval_derived_effects_prs.T,
                                 pval_derived_effects_prs)
            numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
            regression_slope = (numerator / denominator)[0][0]
            if verbose:
                print(
                    'The slope for predictions with weighted effects is: %0.4f'
                    % regression_slope)

        num_individs = len(prs_dict['pval_derived_effects_prs'])

        # Write PRS out to file.
        if out_file != None:
            write_scores_file(out_file,
                              prs_dict,
                              pval_derived_effects_prs,
                              adj_pred_dict,
                              weights_dict=weights_dict,
                              verbose=verbose)

    return res_dict
Esempio n. 50
0
    def andersonAcceleration(self):

        maximalDimensionOfKrylovSpace = 5

        isMaximalDimensionOfKrylovSpaceReached = False
        dimensionOfKrylovSpace = 0
        b = 1.0

        #Start iteration
        G = self.x_0
        i = 0
        while i < self.numberOfIterations and not self.isInterrupted:
            if dimensionOfKrylovSpace < maximalDimensionOfKrylovSpace:
                dimensionOfKrylovSpace += 1
            else:
                isMaximalDimensionOfKrylovSpaceReached = True

            G_old = G  #save previuos state
            G_new = self.fixPointOperator(G)

            #np.newaxis makes 2D array which can be transposed. Mathematically,
            #this is like interpreting a vector in R^n as Matrix in R^{n x 1}
            G_new_AsColumnVector = G_new[np.newaxis].T

            #Save state G_new in Matrix K
            if not isMaximalDimensionOfKrylovSpaceReached:
                if dimensionOfKrylovSpace == 1:
                    #init K
                    K = G_new_AsColumnVector
                else:
                    #addColumnToMatrixByExtension
                    K = np.concatenate((K, G_new_AsColumnVector), axis=1)

            else:
                #addColumnToMatrixByShifting
                K = np.roll(K, -1, axis=1)
                K[:, -1] = G_new

            #save residuum r in Matrix D
            r = G_new - G_old
            r_AsColumnVector = r[np.newaxis].T  #Is now 2D array
            if not isMaximalDimensionOfKrylovSpaceReached:
                if dimensionOfKrylovSpace == 1:
                    #init D
                    D = r_AsColumnVector
                else:
                    #addColumnToMatrixByExtension
                    D = np.concatenate((D, r_AsColumnVector), axis=1)
            else:
                #addColumnToMatrixByShifting
                D = np.roll(D, -1, axis=1)
                D[:, -1] = r

            if dimensionOfKrylovSpace == 1:
                G = G_new
                continue

            #Only calculate W if needed
            if not isMaximalDimensionOfKrylovSpaceReached:
                W = self.calculateW(dimensionOfKrylovSpace)

            #Start solving least square problem
            D_reduced = np.dot(D, W)  #D_reduced = DW
            #solve argmin(a) ||D_reduced a + r ||
            a_reduced, resid, rank, sigma = linalg.lstsq(D_reduced, -r)
            a_AsColumnVector = np.dot(W, a_reduced[np.newaxis].T)
            a_AsColumnVector[-1] += 1.0
            G_AsColumnVector = np.dot(
                K, a_AsColumnVector) - (1.0 - b) * np.dot(D, a_AsColumnVector)
            G = G_AsColumnVector[:, 0]

            #We only check for convergence if K and D don't grow anymore
            if isMaximalDimensionOfKrylovSpaceReached:
                norm = np.linalg.norm(G_new)
                previousNorm = np.linalg.norm(G_old)
                relativeProgress = abs(previousNorm -
                                       norm) / (norm + np.finfo(float).eps)
                if relativeProgress < self.convergenceCriterion:  #Defined in super class
                    #pass
                    print "Anderson converged after", i, "steps"
                    break

            i += 1
        #end while
        if i == self.numberOfIterations:
            print "Anderson did not converge after", self.numberOfIterations, "steps"

        self.derivePhysicalQuantitiesFromFixpoint(G)
Esempio n. 51
0
def LMqr(fun, pars, args,
         tau = 1e-3, eps1 = 1e-8, eps2 = 1e-8, kmax = 100,
         verbose = False):

    from scipy.linalg import lstsq
    import scipy.linalg

    """Implementation of the Levenberg-Marquardt algorithm in pure
    Python. Instead of using the normal equations this version uses QR
    factorization for enhanced accuracy. Significantly slower (factor
    2)."""
    p = pars
    f, J = fun(p, *args)

    A = inner(J,J)
    g = inner(J,f)

    I = eye(len(p))

    k = 0; nu = 2
    mu = tau * max(diag(A))
    stop = norm(g, Inf) < eps1

    while not stop and k < kmax:
        k += 1

        if verbose:
            print("step %d: |f|: %9.3g mu: %g"%(k, norm(f), mu))

        tic = time.time()
        A = inner(J, J)
        g = inner(J, f)

        d = solve( A + mu*I, -g)
        print ('XX', d, time.time() - tic)

        
        des = numpy.hstack((-f, numpy.zeros((len(p),))))
        Des = numpy.vstack((numpy.transpose(J),
                            numpy.sqrt(mu)*I))

        tic = time.time()
        d0, resids, rank, s = lstsq(Des, des)
        print('d0', d0, time.time() - tic)

        
        tic = time.time()
        #q, r = scipy.linalg.qr(Des, econ = True, mode = 'qr')
        #d4   = solve(r, inner(numpy.transpose(q), des))
        r = scipy.linalg.qr(Des, econ = True, mode = 'r')
        d4   = scipy.linalg.cho_solve( (r, False), -inner(J, f))
        print('d4', d4, time.time() - tic)

        
        

        tic = time.time()
        q, r = scipy.linalg.qr(numpy.transpose(J), econ = True, mode = 'qr')
        d3 = solve( r + mu*numpy.linalg.inv(r.transpose()), -inner(numpy.transpose(q),f))
        #d3 = scipy.linalg.cho_solve( (r + mu*numpy.linalg.inv(r.transpose()), False),
        #                             -inner(numpy.transpose(q),f))
        print ('d3', d3, time.time() - tic)

        print (d - d0)
        print (d3 - d0)
        print (d4 - d0)


        if norm(d) < eps2*(norm(p) + eps2):
            stop = True
            reason = 'small step'
            break

        pnew = p + d

        fnew, Jnew = fun(pnew, *args)
        rho = (norm(f) - norm(fnew))/inner(d, mu*d - g) # /2????

        if rho > 0:
            p = pnew
            #A = inner(Jnew, Jnew)
            #g = inner(Jnew, fnew)
            f = fnew
            J = Jnew
            if (norm(g, Inf) < eps1): # or norm(fnew) < eps3):
                stop = True
                reason = "small gradient"
                break
            mu = mu * max(1.0/3, 1 - (2*rho - 1)**3)
            nu = 2
        else:
            mu = mu * nu
            nu = 2*nu

    else:
        reason = "max iter reached"

    if verbose:
        print (reason)
    return p
Esempio n. 52
0
def N4SID(u,y,NumRows,NumCols,NSig,require_stable=False):
    """
    A,B,C,D,Cov,Sigma = N4SID(u,y,NumRows,NumCols,n,require_stable=False)
    Let NumVals be the number of input and output values available
    In this case:
    u - NumInputs x NumVals array of inputs
    y - NumOutputs x NumVals array of outputs
    NumRows - Number of block rows in the past and future block Hankel matrices
    NumCols - Number of columns in the past and future block Hankel matrices
    n - desired state dimension.
    For the algorithm to work, you must have:
    NumVals >= 2*NumRows + NumCols - 1
    Returns
    A,B,C,D - the state space realization from inputs to outputs
    
    Cov - the joint covariance of the process and measurement noise
    Sigma - the singular values of the oblique projection of 
            row space of future outputs along row space of 
            future inputs on the row space of past inputs and outputs.
            
            Examining Sigma can be used to determine the required state 
            dimension
    require_stable - An optional boolean parameter. Default is False
                     If False, the standard N4SID algorithm is used
                     If True, the state matrix, A, 
                     will have spectral radius < 1.
                     In order to run with require_stable=True, cvxpy 
                     must be installed. 
           
    """
    NumInputs = u.shape[0]
    NumOutputs = y.shape[0]

    
    NumDict = {'Inputs': NumInputs,
               'Outputs': NumOutputs,
               'Dimension':NSig,
               'Rows':NumRows,
               'Columns':NumCols}
    
    GammaDict,S = preProcess(u,y,NumDict)


    GamData = GammaDict['Data']
    GamYData = GammaDict['DataY']

    if not require_stable:
        K = la.lstsq(GamData.T,GamYData.T)[0].T
    else:

        Kvar = cvx.Variable(NSig+NumOutputs,NSig+NumInputs*NumRows)

        Avar = Kvar[:NSig,:NSig]

        Pvar = cvx.Semidef(NSig)

        LyapCheck = cvx.vstack(cvx.hstack(Pvar,Avar),
                               cvx.hstack(Avar.T,np.eye(NSig)))

        Constraints = [LyapCheck>>0,Pvar << np.eye(NSig)]

        diffVar = GamYData - Kvar*GamData
        objFun = cvx.norm(diffVar,'fro')
        Objective = cvx.Minimize(objFun)

        Prob = cvx.Problem(Objective,Constraints)

        result = Prob.solve()

        K = Kvar.value

        
    
    AID,BID,CID,DID,CovID = postProcess(K,GammaDict,NumDict)

    
    
    return AID,BID,CID,DID,CovID,S
Esempio n. 53
0
def _apply_rap_music(data,
                     info,
                     times,
                     forward,
                     noise_cov,
                     n_dipoles=2,
                     picks=None):
    """RAP-MUSIC for evoked data.

    Parameters
    ----------
    data : array, shape (n_channels, n_times)
        Evoked data.
    info : dict
        Measurement info.
    times : array
        Times.
    forward : instance of Forward
        Forward operator.
    noise_cov : instance of Covariance
        The noise covariance.
    n_dipoles : int
        The number of dipoles to estimate. The default value is 2.
    picks : array-like of int | None
        Indices (in info) of data channels. If None, MEG and EEG data channels
        (without bad channels) will be used.

    Returns
    -------
    dipoles : list of instances of Dipole
        The dipole fits.
    explained_data : array | None
        Data explained by the dipoles using a least square fitting with the
        selected active dipoles and their estimated orientation.
        Computed only if return_explained_data is True.
    """
    is_free_ori, ch_names, proj, vertno, G = _prepare_beamformer_input(
        info, forward, label=None, picks=picks, pick_ori=None)

    gain = G.copy()

    # Handle whitening + data covariance
    whitener, _ = compute_whitener(noise_cov, info, picks)
    if info['projs']:
        whitener = np.dot(whitener, proj)

    # whiten the leadfield and the data
    G = np.dot(whitener, G)
    data = np.dot(whitener, data)

    eig_values, eig_vectors = linalg.eigh(np.dot(data, data.T))
    phi_sig = eig_vectors[:, -n_dipoles:]

    n_orient = 3 if is_free_ori else 1
    n_channels = G.shape[0]
    A = np.empty((n_channels, n_dipoles))
    gain_dip = np.empty((n_channels, n_dipoles))
    oris = np.empty((n_dipoles, 3))
    poss = np.empty((n_dipoles, 3))

    G_proj = G.copy()
    phi_sig_proj = phi_sig.copy()

    for k in range(n_dipoles):
        subcorr_max = -1.
        for i_source in range(G.shape[1] // n_orient):
            idx_k = slice(n_orient * i_source, n_orient * (i_source + 1))
            Gk = G_proj[:, idx_k]
            if n_orient == 3:
                Gk = np.dot(Gk, forward['source_nn'][idx_k])

            subcorr, ori = _compute_subcorr(Gk, phi_sig_proj)
            if subcorr > subcorr_max:
                subcorr_max = subcorr
                source_idx = i_source
                source_ori = ori
                if n_orient == 3 and source_ori[-1] < 0:
                    # make sure ori is relative to surface ori
                    source_ori *= -1  # XXX

                source_pos = forward['source_rr'][i_source]
                if n_orient == 1:
                    source_ori = forward['source_nn'][i_source]

        idx_k = slice(n_orient * source_idx, n_orient * (source_idx + 1))
        Ak = G[:, idx_k]
        if n_orient == 3:
            Ak = np.dot(Ak, np.dot(forward['source_nn'][idx_k], source_ori))

        A[:, k] = Ak.ravel()

        gain_k = gain[:, idx_k]
        if n_orient == 3:
            gain_k = np.dot(gain_k,
                            np.dot(forward['source_nn'][idx_k], source_ori))
        gain_dip[:, k] = gain_k.ravel()

        oris[k] = source_ori
        poss[k] = source_pos

        logger.info("source %s found: p = %s" % (k + 1, source_idx))
        if n_orient == 3:
            logger.info("ori = %s %s %s" % tuple(oris[k]))

        projection = _compute_proj(A[:, :k + 1])
        G_proj = np.dot(projection, G)
        phi_sig_proj = np.dot(projection, phi_sig)

    sol = linalg.lstsq(A, data)[0]

    explained_data = np.dot(gain_dip, sol)
    residual = data - np.dot(whitener, explained_data)
    gof = 1. - np.sum(residual**2, axis=0) / np.sum(data**2, axis=0)
    return _make_dipoles(times, poss, oris, sol, gof), explained_data
Esempio n. 54
0
def partial_corr(C):
    """
    Partial Correlation in Python (clone of Matlab's partialcorr)
    
    This uses the linear regression approach to compute the partial 
    correlation (might be slow for a huge number of variables). The 
    algorithm is detailed here:
    
    http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression
    
    Taking X and Y two variables of interest and Z the matrix with all
    the variable minus {X, Y}, the algorithm can be summarized as
    
        1) perform a normal linear least-squares regression with X as the
           target and Z as the predictor
        2) calculate the residuals in Step #1
        3) perform a normal linear least-squares regression with Y as the
           target and Z as the predictor
        4) calculate the residuals in Step #3
        5) calculate the correlation coefficient between the residuals from
           Steps #2 and #4; 
    
        The result is the partial correlation between X and Y while controlling
        for the effect of Z.
    
    Date: Nov 2014
    Author: Fabian Pedregosa-Izquierdo, [email protected]
    Testing: Valentina Borghesani, [email protected]
    """
    """
    Returns the sample linear partial correlation coefficients between pairs of
    variables in C, controlling for the remaining variables in C.
    
    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a
        variable
    
    Returns
    -------
    P : array-like, shape (p, p)
        P[i, j] contains the partial correlation of C[:, i] and C[:, j]
        controlling for the remaining variables in C.
    """

    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    for i in range(p):
        P_corr[i, i] = 1
        for j in range(i + 1, p):
            idx = np.ones(p, dtype=np.bool)
            idx[i] = False
            idx[j] = False
            beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

            res_j = C[:, j] - C[:, idx].dot(beta_i)
            res_i = C[:, i] - C[:, idx].dot(beta_j)

            #            corr = sp.pearsonr(res_i, res_j)[0]
            corr = sp.spearmanr(res_i, res_j, nan_policy='omit')[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr

    return P_corr
Esempio n. 55
0
def lin_leastsq(model, points, vals, errs=None, fullOutput=False, **keywords):
    ''' Performs linear least squares on a function & dataset
    
        @param model function to be fit to.
            Contains function basis(), which is an array of funcs
            that take points as arguments to form config matrix
            Or can be a list of said functions
        @param points[i][j] coordinates of data points, where
            points[i] are individual datapoints and 
            points[i][j] are components of datapoint position
        @param vals value of data and each position in points
        @param errs error in vals at each position in points
        @param fullOutput selects how much info to return
    
        @returns params, {residual, covar}, isConverged 
            where:
                params is a list of best fit parameters
                chisq is chisq or equivalent Student-t distribution
                covar is the covariance matrix
                isConvered is a bool describing convergence (always true)
    '''

    log.log(12, 'Entering lin_leastsq')

    ###################

    def makeMatrixs(points, vals, errs):

        A = numpy.zeros((len(points), len(basis)))
        if errs is None:
            for i in xrange(len(points)):
                for j in xrange(len(basis)):
                    A[i, j] = basis[j](points[i])
            b = vals
        else:
            b = zeros((len(points), 1))
            for i in xrange(len(points)):
                for j in xrange(len(basis)):
                    A[i, j] = (basis[j](points[i])) / errs[i]
                b[i] = vals[i] / errs[i]
        return A, b

    #####################

    def calcCovarMatrix(params):
        u, s, v = linalg.svd(A)

        covar = numpy.zeros((len(params), len(params)))
        for i in xrange(len(params)):
            for j in xrange(len(params)):
                for k in xrange(len(params)):
                    covar[i, j] += v[i, k] * v[j, k] / s[k]
                covar[j, i] = covar[i, j]
        return covar

    #####################

    def calcResiduals(params, x, y, errs):
        predicted = model(x, params)
        if errs is None:
            return y - predicted
        else:
            return numpy.divide(y - predicted, errs)

    #####################

    if hasattr(model, '__getitem__'):
        basis = model
    else:
        basis = model.basis()

    points, vals, errs = _prepData(points, vals, errs)

    A, b = makeMatrixs(points, vals, errs)

    (params, resids, rank, s) = linalg.lstsq(A, b)

    if fullOutput:

        covar = calcCovarMatrix(params)

        chisq = numpy.sum(calcResiduals(params, points, vals, errs)**2)

        log.log(12, 'Returning from lin_leastsq: fullOutput')

        return params, chisq, covar, True

    log.log(12, 'Returning from lin_leastsq')

    return params, True
Esempio n. 56
0
def nnmf_sparse(V0,
                XYZ0,
                W0,
                B0,
                S0,
                tolfun=1e-4,
                miniter=10,
                maxiter=100,
                timeseries_mean=1.0,
                timepoints=None,
                verbosity=1):
    '''
    cell detection via nonnegative matrix factorization with sparseness projection
    V0 = voxel_timeseries_valid
    XYZ0 = voxel_xyz_valid
    W0 = cell_weight_init_valid
    B0 = cell_neighborhood_valid
    S0 = cell_sparseness    
    '''

    import os
    import numpy as np
    from scipy import stats
    from scipy import linalg
    from skimage import measure
    from voluseg._tools.sparseness_projection import sparseness_projection

    os.environ['MKL_NUM_THREADS'] = '1'

    # CAUTION: variable is modified in-place to save memory
    V0 *= (timeseries_mean / V0.mean(1)[:, None])  # normalize voxel timeseries

    if not timepoints is None:
        V = V0[:, timepoints].astype(float)  # copy input signal
    else:
        V = V0.astype(float)  # copy input signal

    XYZ = XYZ0.astype(int)
    W = W0.astype(float)
    B = B0.astype(bool)
    S = S0.copy()

    # get dimensions
    n, t = V.shape
    n_, c = W.shape
    assert (n_ == n)

    H = np.zeros((c, t))  # zero timeseries array
    dnorm_prev = np.full(2, np.inf)  # last two d-norms
    for ii in range(maxiter):
        # save current states
        H_ = H.copy()

        # Alternate least squares with regularization
        H = np.maximum(linalg.lstsq(W, V)[0], 0)
        H *= (timeseries_mean / H.mean(1)[:, None]
              )  # normalize component timeseries

        W = np.maximum(linalg.lstsq(V.T, H.T)[0], 0)
        W[np.logical_not(B)] = 0  # restrict component boundaries
        for ci in range(c):
            W_ci = W[B[:, ci], ci]
            if np.any(W_ci) and (S[ci] > 0):
                # get relative dimensions of component
                XYZ_ci = XYZ[B[:, ci]] - XYZ[B[:, ci]].min(0)

                # enforce component sparseness and percentile threshold
                W_ci = sparseness_projection(W_ci,
                                             S[ci],
                                             at_least_as_sparse=True)

                # retain largest connected component (mode)
                L_ci = np.zeros(np.ptp(XYZ_ci, 0) + 1, dtype=bool)
                L_ci[tuple(zip(*XYZ_ci))] = W_ci > 0
                L_ci = measure.label(L_ci, connectivity=3)
                lci_mode = stats.mode(L_ci[L_ci > 0]).mode[0]
                W_ci[L_ci[tuple(zip(*XYZ_ci))] != lci_mode] = 0

                W[B[:, ci], ci] = W_ci

        # Get norm of difference and check for convergence
        dnorm = np.sqrt(np.mean(np.square(V - W.dot(H)))) / timeseries_mean
        diffh = np.sqrt(np.mean(np.square(H - H_))) / timeseries_mean
        if ((dnorm_prev.max(0) - dnorm) < tolfun) & (diffh < tolfun):
            if (ii >= miniter):
                break
        dnorm_prev[1] = dnorm_prev[0]
        dnorm_prev[0] = dnorm

        if verbosity:
            print((ii, dnorm, diffh))

    # Perform final regression on full input timeseries
    H = np.maximum(linalg.lstsq(W, V0)[0], 0)
    H *= (timeseries_mean / H.mean(1)[:, None]
          )  # normalize component timeseries

    return (W, H, dnorm)
Esempio n. 57
0
    def simcond(self, xo, method='approx', i_unknown=None):
        """
        Simulate values conditionally on observed known values

        Parameters
        ----------
        x : vector
            timeseries including missing data.
            (missing data must be NaN if i_unknown is not given)
            Assumption: The covariance of x is equal to self and have the
            same sample period.
        method : string
            defining method used in the conditional simulation. Options are:
            'approximate': Condition only on the closest points. Quite fast
            'exact' : Exact simulation. Slow for large data sets, may not
                return any result due to near singularity of the covariance
                matrix.
        i_unknown : integers
            indices to spurious or missing data in x

        Returns
        -------
        sample : ndarray
            a random sample of the missing values conditioned on the observed
            data.
        mu, sigma : ndarray
            mean and standard deviation, respectively, of the missing values
            conditioned on the observed data.

        Notes
        -----
        SIMCOND generates the missing values from x conditioned on the observed
        values assuming x comes from a multivariate Gaussian distribution
        with zero expectation and Auto Covariance function R.

        See also
        --------
        CovData1D.sim
        TimeSeries.reconstruct,
        rndnormnd

        References
        ----------
        Brodtkorb, P, Myrhaug, D, and Rue, H (2001)
        "Joint distribution of wave height and wave crest velocity from
        reconstructed data with application to ringing"
        Int. Journal of Offshore and Polar Engineering, Vol 11, No. 1,
        pp 23--32

        Brodtkorb, P, Myrhaug, D, and Rue, H (1999)
        "Joint distribution of wave height and wave crest velocity from
        reconstructed data"
        in Proceedings of 9th ISOPE Conference, Vol III, pp 66-73
        """
        x = atleast_1d(xo).ravel()
        acf = self._get_acf()

        num_x = len(x)
        num_acf = len(acf)

        if i_unknown is not None:
            x[i_unknown] = nan
        i_unknown = flatnonzero(isnan(x))
        num_unknown = len(i_unknown)

        mu1o = zeros((num_unknown,))
        mu1o_std = zeros((num_unknown,))
        sample = zeros((num_unknown,))
        if num_unknown == 0:
            warnings.warn('No missing data, no point to continue.')
            return sample, mu1o, mu1o_std
        if num_unknown == num_x:
            warnings.warn('All data missing, returning sample from' +
                          ' the apriori distribution.')
            mu1o_std = ones(num_unknown) * sqrt(acf[0])
            return self.sim(ns=num_unknown, cases=1)[:, 1], mu1o, mu1o_std

        i_known = flatnonzero(1 - isnan(x))

        if method.startswith('exac'):
            # exact but slow. It also may not return any result
            if num_acf > 0.3 * num_x:
                sigma = toeplitz(hstack((acf, zeros(num_x - num_acf))))
            else:
                acf[0] = acf[0] * 1.00001
                sigma = sptoeplitz(hstack((acf, zeros(num_x - num_acf))))
            soo, so1, s11 = self._split_cov(sigma, i_known, i_unknown)

            if issparse(sigma):
                so1 = so1.todense()
                s11 = s11.todense()
                s1o_sooinv = spsolve(soo + soo.T, 2 * so1).T
            else:
                sooinv_so1, _res, _rank, _s = lstsq(soo + soo.T, 2 * so1,
                                                    cond=1e-4)
                s1o_sooinv = sooinv_so1.T
            mu1o = s1o_sooinv.dot(x[i_known])
            sigma1o = s11 - s1o_sooinv.dot(so1)
            if (diag(sigma1o) < 0).any():
                raise ValueError('Failed to converge to a solution')

            mu1o_std = sqrt(diag(sigma1o))
            sample[:] = rndnormnd(mu1o, sigma1o, cases=1).ravel()

        elif method.startswith('appr'):
            # approximating by only condition on the closest points

            num_sig = min(2 * num_acf, num_x)

            sigma = toeplitz(hstack((acf, zeros(num_sig - num_acf))))
            overlap = int(num_sig / 4)
            # indices to the points used
            idx = r_[0:num_sig] + max(0, min(i_unknown[0] - overlap,
                                             num_x - num_sig))
            mask_unknown = zeros(num_x, dtype=bool)
            # temporary storage of indices to missing points
            mask_unknown[i_unknown] = True
            t_unknown = where(mask_unknown[idx])[0]
            t_known = where(1 - mask_unknown[idx])[0]
            ns = len(t_unknown)  # number of missing data in the interval

            num_restored = 0  # number of previously simulated points
            x2 = x.copy()

            while ns > 0:
                soo, so1, s11 = self._split_cov(sigma, t_known, t_unknown)
                if issparse(soo):
                    so1 = so1.todense()
                    s11 = s11.todense()
                    s1o_sooinv = spsolve(soo + soo.T, 2 * so1).T
                else:
                    sooinv_so1, _res, _rank, _s = lstsq(soo + soo.T, 2 * so1,
                                                        cond=1e-4)
                    s1o_sooinv = sooinv_so1.T
                sigma1o = s11 - s1o_sooinv.dot(so1)
                if (diag(sigma1o) < 0).any():
                    raise ValueError('Failed to converge to a solution')

                ix = slice((num_restored), (num_restored + ns))
                # standard deviation of the expected surface
                mu1o_std[ix] = np.maximum(mu1o_std[ix], sqrt(diag(sigma1o)))

                # expected surface conditioned on the closest known
                # observations from x
                mu1o[ix] = s1o_sooinv.dot(x2[idx[t_known]])
                # sample conditioned on the known observations from x
                mu1os = s1o_sooinv.dot(x[idx[t_known]])
                sample[ix] = rndnormnd(mu1os, sigma1o, cases=1)
                if idx[-1] == num_x - 1:
                    ns = 0  # no more points to simulate
                else:
                    x2[idx[t_unknown]] = mu1o[ix]  # expected surface
                    x[idx[t_unknown]] = sample[ix]  # sampled surface
                    # removing indices to data which has been simulated
                    mask_unknown[idx[:-overlap]] = False
                    # data we want to simulate once more
                    nw = sum(mask_unknown[idx[-overlap:]] is True)
                    num_restored += ns - nw  # update # points simulated so far

                    idx = self._update_window(idx, i_unknown, num_x, num_acf,
                                              overlap, nw, num_restored)

                    # find new interval with missing data
                    t_unknown = flatnonzero(mask_unknown[idx])
                    t_known = flatnonzero(1 - mask_unknown[idx])
                    ns = len(t_unknown)  # # missing data in the interval
        return sample, mu1o, mu1o_std
Esempio n. 58
0
 def test_check_finite(self):
     a = [[1, 20], [-30, 4]]
     for b in ([[1, 0], [0, 1]], [1, 0], [[2, 1], [-30, 4]]):
         x = lstsq(a, b, check_finite=False)[0]
         assert_array_almost_equal(dot(a, x), b)
Esempio n. 59
0
    def fit(self,
            source,
            destination,
            order=4,
            reg=1e-5,
            center=True,
            match='oct5',
            verbose=None):
        """Fit the warp from source points to destination points.

        Parameters
        ----------
        source : array, shape (n_src, 3)
            The source points.
        destination : array, shape (n_dest, 3)
            The destination points.
        order : int
            Order of the spherical harmonic fit.
        reg : float
            Regularization of the TPS warp.
        center : bool
            If True, center the points by fitting a sphere to points
            that are in a reasonable region for head digitization.
        match : str
            The uniformly-spaced points to match on the two surfaces.
            Can be "ico#" or "oct#" where "#" is an integer.
            The default is "oct5".
        %(verbose)s

        Returns
        -------
        inst : instance of SphericalSurfaceWarp
            The warping object (for chaining).
        """
        from .bem import _fit_sphere
        from .source_space import _check_spacing
        match_rr = _check_spacing(match, verbose=False)[2]['rr']
        logger.info('Computing TPS warp')
        src_center = dest_center = np.zeros(3)
        if center:
            logger.info('    Centering data')
            hsp = np.array(
                [p for p in source if not (p[2] < -1e-6 and p[1] > 1e-6)])
            src_center = _fit_sphere(hsp, disp=False)[1]
            source = source - src_center
            hsp = np.array(
                [p for p in destination if not (p[2] < 0 and p[1] > 0)])
            dest_center = _fit_sphere(hsp, disp=False)[1]
            destination = destination - dest_center
            logger.info('    Using centers %s -> %s' % (np.array_str(
                src_center, None, 3), np.array_str(dest_center, None, 3)))
        self._fit_params = dict(n_src=len(source),
                                n_dest=len(destination),
                                match=match,
                                n_match=len(match_rr),
                                order=order,
                                reg=reg)
        assert source.shape[1] == destination.shape[1] == 3
        self._destination = destination.copy()
        # 1. Compute spherical coordinates of source and destination points
        logger.info('    Converting to spherical coordinates')
        src_rad_az_pol = _cart_to_sph(source).T
        dest_rad_az_pol = _cart_to_sph(destination).T
        match_rad_az_pol = _cart_to_sph(match_rr).T
        del match_rr
        # 2. Compute spherical harmonic coefficients for all points
        logger.info('    Computing spherical harmonic approximation with '
                    'order %s' % order)
        src_sph = _compute_sph_harm(order, *src_rad_az_pol[1:])
        dest_sph = _compute_sph_harm(order, *dest_rad_az_pol[1:])
        match_sph = _compute_sph_harm(order, *match_rad_az_pol[1:])
        # 3. Fit spherical harmonics to both surfaces to smooth them
        src_coeffs = linalg.lstsq(src_sph, src_rad_az_pol[0])[0]
        dest_coeffs = linalg.lstsq(dest_sph, dest_rad_az_pol[0])[0]
        # 4. Smooth both surfaces using these coefficients, and evaluate at
        #     the "shape" points
        logger.info('    Matching %d points (%s) on smoothed surfaces' %
                    (len(match_sph), match))
        src_rad_az_pol = match_rad_az_pol.copy()
        src_rad_az_pol[0] = np.abs(np.dot(match_sph, src_coeffs))
        dest_rad_az_pol = match_rad_az_pol.copy()
        dest_rad_az_pol[0] = np.abs(np.dot(match_sph, dest_coeffs))
        # 5. Convert matched points to Cartesion coordinates and put back
        source = _sph_to_cart(src_rad_az_pol.T)
        source += src_center
        destination = _sph_to_cart(dest_rad_az_pol.T)
        destination += dest_center
        # 6. Compute TPS warp of matched points from smoothed surfaces
        self._warp = _TPSWarp().fit(source, destination, reg)
        self._matched = np.array([source, destination])
        logger.info('[done]')
        return self
Esempio n. 60
0
def img_to_signals_maps(imgs, maps_img, mask_img=None):
    """Extract region signals from image.

    This function is applicable to regions defined by maps.

    Parameters
    ----------
    imgs: Niimg-like object
        See http://nilearn.github.io/manipulating_images/input_output.html
        Input images.

    maps_img: Niimg-like object
        See http://nilearn.github.io/manipulating_images/input_output.html
        regions definition as maps (array of weights).
        shape: imgs.shape + (region number, )

    mask_img: Niimg-like object
        See http://nilearn.github.io/manipulating_images/input_output.html
        mask to apply to regions before extracting signals. Every point
        outside the mask is considered as background (i.e. outside of any
        region).

    order: str
        ordering of output array ("C" or "F"). Defaults to "F".

    Returns
    -------
    region_signals: numpy.ndarray
        Signals extracted from each region.
        Shape is: (scans number, number of regions intersecting mask)

    labels: list
        maps_img[..., labels[n]] is the region that has been used to extract
        signal region_signals[:, n].

    See also
    --------
    nilearn.regions.img_to_signals_labels
    nilearn.regions.signals_to_img_maps
    """

    maps_img = _utils.check_niimg_4d(maps_img)
    imgs = _utils.check_niimg_4d(imgs)
    affine = imgs.affine
    shape = imgs.shape[:3]

    # Check shapes and affines.
    if maps_img.shape[:3] != shape:
        raise ValueError("maps_img and imgs shapes must be identical.")
    if abs(maps_img.affine - affine).max() > 1e-9:
        raise ValueError("maps_img and imgs affines must be identical")

    maps_data = _safe_get_data(maps_img, ensure_finite=True)

    if mask_img is not None:
        mask_img = _utils.check_niimg_3d(mask_img)
        if mask_img.shape != shape:
            raise ValueError("mask_img and imgs shapes must be identical.")
        if abs(mask_img.affine - affine).max() > 1e-9:
            raise ValueError("mask_img and imgs affines must be identical")
        maps_data, maps_mask, labels = \
                   _trim_maps(maps_data,
                              _safe_get_data(mask_img, ensure_finite=True),
                              keep_empty=True)
        maps_mask = _utils.as_ndarray(maps_mask, dtype=np.bool)
    else:
        maps_mask = np.ones(maps_data.shape[:3], dtype=np.bool)
        labels = np.arange(maps_data.shape[-1], dtype=np.int)

    data = _safe_get_data(imgs, ensure_finite=True)
    region_signals = linalg.lstsq(maps_data[maps_mask, :],
                                  data[maps_mask, :])[0].T

    return region_signals, list(labels)