Esempio n. 1
0
def get_resid(zscores, swld, wcor):
    """
    Regress out the average pleiotropic signal tagged by TWAS at the region

    :param zscores: numpy.ndarray TWAS zscores
    :param swld: numpy.ndarray intercept variable
    :param wcor: numpy.ndarray predicted expression correlation

    :return: tuple (residual TWAS zscores, intercept z-score)
    """
    m, m = wcor.shape
    m, p = swld.shape

    # create mean factor
    intercept = swld.dot(np.ones(p))

    # estimate under the null for variance components, i.e. V = SW LD SW
    wcor_inv, rank = lin.pinvh(wcor, return_rank=True)

    numer = mdot([intercept.T, wcor_inv, zscores])
    denom = mdot([intercept.T, wcor_inv, intercept])
    alpha = numer / denom
    resid = zscores - intercept * alpha

    s2 = mdot([resid, wcor_inv, resid]) / (rank - 1)
    inter_se = np.sqrt(s2 / denom)
    inter_z = alpha / inter_se

    return resid, inter_z
Esempio n. 2
0
    def prepare_schur_complement(self):
        bundle = self.bundle
        nc = len(self.camera_ids)
        nt = len(self.track_ids)

        # Fill with zeros
        self.HCCs.fill(0.)
        self.HPPs.fill(0.)
        self.HCPs.fill(0.)
        self.bCs.fill(0.)
        self.bPs.fill(0.)

        # Compute various components
	linalg.init()
        for j,track_id in enumerate(self.track_ids):
            track = bundle.tracks[track_id]
            #print track
		
            for i,camera_id in enumerate(self.camera_ids):
                if track.has_measurement(camera_id):
                    r = bundle.residual(camera_id, track_id)
                    #print r
                    Jc, Jp = bundle.Jresidual(camera_id, track_id)
		    #print camera_id,track_id
                    
                    Jc_float = Jc.astype(np.float32)
                    Jc_T_float = Jc.T.astype(np.float32)
                    Jp_float = Jp.astype(np.float32)
                    Jp_T_float = Jp.T.astype(np.float32)
		    #r_float = r.astype(np.float32)
		    
                    #print Jc_float,Jp_float
                    #a_gpu = cuda.mem_alloc(Jc_float.nbytes)
                    #b_gpu = cuda.mem_alloc(Jp_float.nbytes)
                    #print Jc_float
                    #print Jc
                    #print "===="
                    #print Jc_T_float
                    #print Jc.T
                    #print "===="
                    #cuda.memcpy_htod(a_gpu,Jc_float)
                    #cuda.memcpy_htod(b_gpu,Jp_float)
		    
                    a_gpu = gpuarray.to_gpu(Jc_float)
                    b_gpu = gpuarray.to_gpu(Jc_T_float)
                    c_gpu = gpuarray.to_gpu(Jp_float)
                    d_gpu = gpuarray.to_gpu(Jp_T_float)
                    #e_gpu = gpuarray.to_gpu(r_float)
		    self.HCCs[i]  += linalg.mdot(b_gpu,a_gpu).get()
		    self.HPPs[j]  += linalg.mdot(d_gpu,c_gpu).get()
		    #self.HCPs[i,j]  = linalg.mdot(b_gpu,c_gpu).get()
		    #self.bCs[i]   += linalg.mdot(b_gpu,e_gpu).get()
		    #self.bPs[j]   += linalg.mdot(d_gpu,e_gpu).get()
                    #self.HCCs[i]    += dots(Jc.T, Jc)
                    #self.HPPs[j]    += dots(Jp.T, Jp)
                    self.HCPs[i,j]   = dots(Jc.T, Jp)
                    self.bCs[i]     += dots(Jc.T, r)
                    self.bPs[j]     += dots(Jp.T, r)
Esempio n. 3
0
    def kernel(self, point, xmat, k):
        get_shape = np.array(xmat).shape[0]

        # same as distance_matrix(xmat, xmat)
        # Use for verification, contivnue below
        # self.distance_matrix_own(get_shape, xmat)

        # get upper triangular distance matrix p.7 (5)
        M = np.triu(distance_matrix(xmat, xmat), 0)
        # print(f"M shape {M.shape} \n {M[:5,][:5,]}")

        # get positive definite distance matric p.7 (5)
        D = dot(M.T, M)
        # why is it different than the solution in the internet?
        # https: // matrixcalc.org / en /  # %7B%7B0,1,2,3%7D,%7B0,0,4,5%7D,%7B0,0,0,6%7D,%7B0,0,0,0%7D%7D%2A%7B%7B0,0,0,0%7D,%7B1,0,0,0%7D,%7B2,4,0,0%7D,%7B3,5,6,0%7D%7D
        # here: firts row and column are zero instead of last row and column
        # print(f"D.dot {D[:5,][:5,]}")
        w_k = np.zeros(np.array(xmat).shape[1])

        for j in range(get_shape):
            diff = point - xmat[j]
            print(f"diff {diff}")
            # w_k[j, j] = np.exp(diff * diff.T / (-2.0 * k ** 2))

            w_k[j, j] = math.exp(-(1 / 2) * mdot([diff.T, D, diff]))
Esempio n. 4
0
def comp_score_logit(df, is_sex_specific):
    logger.info("Model computation score")
    # Remove the sex covariate for sex-specific endpoints, otherwise
    # it will fail since there will be no females or no males.
    model = 'drug ~ yob + yob*yob + fg_endpoint_year + fg_endpoint_year*fg_endpoint_year'
    if not is_sex_specific:
        model += ' + female'
    # Compute score using Logistic model, predict using fixed values
    mod = logit(model, df)
    res = mod.fit(disp=False)  # fit() without displaying convergence messages
    predict_data = pd.DataFrame({
        "Intercept": [1.0],
        "yob": [PRED_YOB],
        "fg_endpoint_year": [PRED_FG_ENDPOINT_YEAR],
        "female": [PRED_FEMALE]
    })

    # Force "predict_cata" and "cov_params" matrix to use same column
    # order, otherwise it will to a silent bug as their values are put
    # together computing the std err with "mdot" below.
    col_order = res.cov_params().columns.values
    predict_data = predict_data.loc[:, col_order]

    # Compute the standard error of the prediction
    pred = res.predict(predict_data)
    pred_lin = np.log(pred / (1 - pred))  # to scale of the linear predictors
    stderr = np.sqrt(mdot([predict_data, res.cov_params(), predict_data.T]))
    real_stderr = stderr.flatten() * (np.abs(np.exp(pred_lin)) /
                                      (1 + np.exp(pred_lin))**2)

    return pred[0], real_stderr[0]
Esempio n. 5
0
def estimate_cor(wmat, ldmat, intercept=False):
    """
    Estimate the sample correlation structure for predicted expression.

    :param wmat: numpy.ndarray eQTL weight matrix for a risk region
    :param ldmat: numpy.ndarray LD matrix for a risk region
    :param intercept: bool to return the intercept variable or not

    :return: tuple (pred_expr correlation, intercept variable; None if intercept=False)
    """
    wcov = mdot([wmat.T, ldmat, wmat])
    scale = np.diag(1 / np.sqrt(np.diag(wcov)))
    wcor = mdot([scale, wcov, scale])

    if intercept:
        inter = mdot([scale, wmat.T, ldmat])
        return wcor, inter
    else:
        return wcor, None
Esempio n. 6
0
def _train_gblup(y, Z, X, include_ses=False, p_threshold=0.01):
    log = logging.getLogger(pyfocus.LOG)

    try:
        from limix.qc import normalise_covariance
    except ImportError as ie:
        log.error(
            "Training submodule requires limix>=2.0.0 and sklearn to be installed."
        )
        raise
    from numpy.linalg import multi_dot as mdot
    from scipy.linalg import pinvh

    log.debug("Initializing GBLUP model")

    attrs = dict()

    # estimate heritability using limix
    K_cis = np.dot(Z, Z.T)
    K_cis = normalise_covariance(K_cis)
    fe_var, s2u, s2e, logl, fixed_betas, pval = _fit_cis_herit(y, K_cis, X)
    yresid = y - np.dot(X, fixed_betas)

    if pval > p_threshold:
        log.info("h2g pvalue {} greater than threshold {}. Skipping".format(
            pval, p_threshold))
        return None

    attrs["h2g"] = s2u / (fe_var + s2u + s2e)
    attrs["h2g.logl"] = logl
    attrs["h2g.pvalue"] = pval

    # Total variance
    n, p = Z.shape

    # ridge solution (i.e. rrBLUP)
    # this will be slower than normal GBLUP when p > n but is a little bit more flexible
    ZtZpDinv = pinvh(np.dot(Z.T, Z) + np.eye(p) * (s2e / s2u))
    betas = mdot([ZtZpDinv, Z.T, yresid])

    if include_ses:
        # TODO: come back to this with matrix operations rather than list comprehensions
        # jack-knife standard-errors over the fast leave-one-out estimates using rrBLUP
        """
        h = np.array([mdot([Z[i], ZtZpDinv, Z[i]]) for i in range(n)])
        e = yresid - np.dot(Z, betas)
        beta_jk = [betas - np.dot(ZtZpDinv, Z[i] * e[i]) / (1 - h[i]) for i in range(n)]
        ses = np.sqrt(np.mean(beta_jk, axis=0) * (n - 1))
        """
        ses = None
    else:
        ses = None

    return betas, ses, attrs
Esempio n. 7
0
    def learn_beta(self, observation):
        """
        Own implementation of linear regression
        :param observation:
        :return:
        """
        observation = np.array(observation)

        X, y = observation[:, :4], observation[:, -1:]
        X = self.prepend_one(X)

        print(np.array(X.T).shape)

        # for point in X:
        #     self.kernel(point, X, 0.5)

        # when weights are computed use np.diag() to put w_k into W
        W = np.identity(np.array(X.T).shape[1])  # place holder for weights (not computed yet)
        print("W.shape: ", W.shape)
        beta_ = mdot([inv(mdot([X.T, W, X])), X.T, W, y])
        print(f"Beta_: {beta_}")
        return beta_
Esempio n. 8
0
    def grad(vars):
        g = np.zeros(r)
        V = sum(As[i] * vars[i] for i in range(r))
        P = pinvh(V)
        ztP = np.dot(zscores, P)
        Pz = ztP.T
        for i in range(r):
            Ai = As[i]
            g[i] = np.trace(P.dot(Ai)) - mdot([ztP, Ai, Pz])

        g = 0.5 * g
        print("||g|| = {}".format(norm(g)))
        return g
Esempio n. 9
0
    def hess(vars):
        AI = np.zeros((r, r))
        V = sum(As[i] * vars[i] for i in range(r))
        P = pinvh(V)
        ztP = np.dot(zscores.T, P)
        Pz = ztP.T
        for i in range(r):
            ztPAsi = np.dot(ztP, As[i])
            for j in range(i + 1):
                AI[i, j] = mdot([ztPAsi, P, As[j], Pz])
                AI[j, i] = AI[i, j]

        AI = 0.5 * AI
        return AI
Esempio n. 10
0
def assoc_test(weights, gwas, ldmat, heterogeneity=False):
    """
    TWAS association test.

    :param weights: numpy.ndarray of eQTL weights
    :param gwas: pyfocus.GWAS object
    :param ldmat: numpy.ndarray LD matrix
    :param heterogeneity:  bool estimate variance from multiplicative random effect

    :return: tuple (beta, se)
    """

    p = ldmat.shape[0]
    assoc = np.dot(weights, gwas.Z)
    if heterogeneity:
        resid = assoc - gwas.Z
        resid_var = mdot([resid, lin.pinvh(ldmat), resid]) / p
    else:
        resid_var = 1

    se = np.sqrt(resid_var * mdot([weights, ldmat, weights]))

    return assoc, se
Esempio n. 11
0
def compute_twas(gwas, coef, LD):
    """
    Compute the TWAS test statistics.

    :param gwas: pandas.DataFrame containing estimated GWAS beta and standard error
    :param coef: numpy.ndarray LASSO eQTL coefficients
    :param LD:  numpy.ndarray p x p LD matrix

    :return: (float, float) the TWAS score and variance estimates
    """
    # compute Z scores
    Z = gwas.beta.values / gwas.se.values

    # score and variance
    score = np.dot(coef, Z)
    within_var = mdot([coef, LD, coef])

    return score, within_var
Esempio n. 12
0
# split into features and labels
X, y = data[:, :2], data[:, 2]
print("X.shape:", X.shape)
print("y.shape:", y.shape)
# 3D plotting
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')  # the projection arg is important!
ax.scatter(X[:, 0], X[:, 1], y, color="red")
ax.set_title("raw data")
plt.draw()
# show, use plt.show() for blocking
# prep for linear reg.
X = prepend_one(X)
print("X.shape:", X.shape)
# Fit model/compute optimal parameters beta
beta_ = mdot([inv(dot(X.T, X)), X.T, y])
print("Optimal beta:", beta_)
# prep for prediction
X_grid = prepend_one(grid2d(-3, 3, num=30))
print("X_grid.shape:", X_grid.shape)
# Predict with trained model
y_grid = dot(X_grid, beta_)
print("Y_grid.shape", y_grid.shape)
# visualize the result
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')  # the projection part is important
ax.scatter(X_grid[:, 1], X_grid[:, 2], y_grid)  # dont use the 1 infront
ax.scatter(X[:, 1], X[:, 2], y, color="red")  # also show the real data
ax.set_title("predicted data")
plt.show()
Esempio n. 13
0
def _impute(merged_snps, ref, annot, taus, gwas_n, obs, to_impute, obsZ, ridge, run_fizi):
    """
    this is the internal logic for the imputation

    I refactored this into diff function to improve flexibility for any changes downstream
    (e.g., MI, sampling, sketching, etc)

    testing out multiple imputation (MI) for the functional part of fizi
    we could incorporate MI into the estimation of LD as well but it might come with a big computational hit
    one cool trick might be to use sketching to speed up LD estimation to maintain performance for MI

    :param merged_snps: pyfizi.MergedPanel object containing merged GWAS and LDRef data
    :param ref: pyfizi.RefPanel object for reference genotype data at the region
    :param annot: pyfizi.Annot object representing the functional annotations at the region (default: None)
    :param taus: pyfizi.Tau object representing the prior variance terms for functional categories (default: None)
    :param gwas_n: numpy.ndarray or int GWAS sample size. If int assumes sample size is uniform at each SNP.
                    Not required if 'N' is column in GWAS data (default: None)
    :param obsZ: numpy.ndarray vector of observed Z-scores that have been flipped to match ref panel
    :param obs: numpy.ndarray boolean vector marking which rows in `merged_snps` have observed Z-scores
    :param to_impute: numpy.ndarray boolean vector marking which rows in `merged_snps` need to be imputed
    :param ridge: float Ridge term to regularize LD estimation (default=0.1)
    :param run_fizi: bool indicating if fizi or impg is run

    :return: (numpy.ndarray imputed_z, numpy.ndarray pvalues, numpy.ndarray r2blups)
    """

    from numpy.linalg import multi_dot as mdot
    from scipy.linalg import pinvh
    from scipy.stats import chi2

    log = logging.getLogger(pyfizi.LOG)
    nobs = np.sum(obs)
    nimp = np.sum(to_impute)

    # compute linkage-disequilibrium estimate
    log.debug("Estimating LD for {} SNPs".format(len(merged_snps)))
    LD = ref.estimate_ld(merged_snps, adjust=ridge)

    log.debug("Partitioning LD into quadrants")
    Voo_ld = LD[obs].T[obs].T
    Vuo_ld = LD[to_impute].T[obs].T
    Vou_ld = Vuo_ld.T
    Vuu_ld = LD[to_impute].T[to_impute].T

    if run_fizi:
        if taus is not None:
            A = annot.get_matrix(merged_snps, taus.names)
            estimates = taus.estimates
            D = np.diag(gwas_n * np.dot(A, estimates)) #/ np.power(np.median(merged_snps.SE.values[obs]), 2)
            Do = D.T[obs].T[obs]
            Du = D.T[to_impute].T[to_impute]
            uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot([Vuo_ld, Do, Voo_ld])
            ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot([Vou_ld, Du, Vuo_ld])
            uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot([Vuo_ld, Do, Vou_ld])
        else:
            A = annot.get_matrix(merged_snps)
            names = annot.names
            Ao = A[obs]
            flag = np.mean(Ao != 0, axis=0) > 0
            Ao = Ao.T[flag].T
            A = A.T[flag].T
            names = names[flag]

            log.debug("Starting inference for variance parameters")
            estimates = pyfizi.infer_taus(obsZ, Voo_ld, Ao)
            if estimates is not None:
                log.debug("Finished variance parameter inference")

                estimates, sigma2e = estimates
                # rescale estimates
                estimates = estimates * np.sum(Ao != 0, axis=0) / np.sum(A != 0, axis=0)

                # N gets inferred as part of the parameter
                D = np.diag(np.dot(A, estimates))
                Do = D.T[obs].T[obs]
                Du = D.T[to_impute].T[to_impute]
                uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot([Vuo_ld, Do, Voo_ld])
                ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot([Vou_ld, Du, Vuo_ld])
                uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot([Vuo_ld, Do, Vou_ld])
            else:
                log.warning("Variance parameter optimization failed. Defaulting to ImpG")
                # estimation failed... default to ImpG
                uoV = Vuo_ld
                ooV = Voo_ld
                uuV = Vuu_ld
    else:
        uoV = Vuo_ld
        ooV = Voo_ld
        uuV = Vuu_ld

    """
    TODO: consider replacing with the following more numerically stable and efficient code
    this is low priority but might be useful to explore at some point

    # method 1; no extra overhead, but addtl solve cost
    ooL = cholesky(ooV, lower=True)
    uoVLinv = triangular_solve(ooL, uoV.T, lower=True)
    LinvZ = triangular_solve(ooL, obsZ, lower=True)

    impZs = uoVLinv.T @ LinvZ
    r2blup = np.sum(uoVLinv ** 2, axis=0) / np.diag(uuV)

    # method 2; extra memory, but cheaper solve cost
    ooL = cholesky(ooV, lower=True)
    tmp = triangular_solve(ooL, np.concatenate((obsZ[:,np.newaxis], uoV.T), axis=1), lower=True)
    uoVLinv = tmp.T[1:]
    LinvZ = tmp.T[0]

    impZs = uoVLinv.T @ LinvZ
    r2blup = np.sum(uoVLinv ** 2, axis=0) / np.diag(uuV)

    # method 3; use conjugate gradient with vec matrix ops over 'raw' LD and diagonal offset,
    # instead of adding offset to LD matrix and solving
    # or even just use raw genotype data vec matrix ops (if N_ref < SNP_obs)
    #    (X'X + Ilambda) @ candidate = X' @ (X @ candidate) + lambda * candidate
    uoVinv = cg(linear_op, uoV.T)
    impZs = uoVinv @ obsZ
    r2blup = np.diag(uoVinv @ uoV.T) / np.diag(uuV) # this can likely be further optimized with a product/sum op
    """
    log.debug("Computing inverse of variance-covariance matrix for {} observed SNPs".format(nobs))
    ooVinv = pinvh(ooV, check_finite=False)

    log.debug("Imputing {} SNPs from {} observed scores".format(nimp, nobs))
    impZs = mdot([uoV, ooVinv, obsZ])

    # compute r2-pred scores
    r2blup = np.diag(mdot([uoV, ooVinv, uoV.T])) / np.diag(uuV)

    # compute two-sided z-test for p-value
    pvals = chi2.sf(impZs ** 2, 1)

    return impZs, pvals, r2blup
Esempio n. 14
0
def _impute(merged_snps, ref, annot, taus, gwas_n, obs, to_impute, obsZ, ridge,
            run_fizi):
    """
    this is the internal logic for the imputation

    I refactored this into diff function to improve flexibility for any changes downstream
    (e.g., MI, sampling, sketching, etc)

    testing out multiple imputation (MI) for the functional part of fizi
    we could incorporate MI into the estimation of LD as well but it might come with a big computational hit
    one cool trick might be to use sketching to speed up LD estimation to maintain performance for MI

    :param merged_snps: pyfizi.MergedPanel object containing merged GWAS and LDRef data
    :param ref: pyfizi.RefPanel object for reference genotype data at the region
    :param annot: pyfizi.Annot object representing the functional annotations at the region (default: None)
    :param taus: pyfizi.Tau object representing the prior variance terms for functional categories (default: None)
    :param gwas_n: numpy.ndarray or int GWAS sample size. If int assumes sample size is uniform at each SNP.
                    Not required if 'N' is column in GWAS data (default: None)
    :param obsZ: numpy.ndarray vector of observed Z-scores that have been flipped to match ref panel
    :param obs: numpy.ndarray boolean vector marking which rows in `merged_snps` have observed Z-scores
    :param to_impute: numpy.ndarray boolean vector marking which rows in `merged_snps` need to be imputed
    :param ridge: float Ridge term to regularize LD estimation (default=0.1)
    :param run_fizi: bool indicating if fizi or impg is run

    :return: (numpy.ndarray imputed_z, numpy.ndarray pvalues, numpy.ndarray r2blups)
    """

    from numpy.linalg import multi_dot as mdot
    from scipy.linalg import pinvh
    from scipy.stats import chi2

    log = logging.getLogger(pyfizi.LOG)
    nobs = np.sum(obs)
    nimp = np.sum(to_impute)

    # compute linkage-disequilibrium estimate
    log.debug("Estimating LD for {} SNPs".format(len(merged_snps)))
    LD = ref.estimate_ld(merged_snps, adjust=ridge)

    log.debug("Partitioning LD into quadrants")
    Voo_ld = LD[obs].T[obs].T
    Vuo_ld = LD[to_impute].T[obs].T
    Vou_ld = Vuo_ld.T
    Vuu_ld = LD[to_impute].T[to_impute].T

    if run_fizi:
        if taus is not None:
            A = annot.get_matrix(merged_snps, taus.names)
            estimates = taus.estimates
            D = np.diag(gwas_n * np.dot(A, estimates))
            Do = D.T[obs].T[obs]
            Du = D.T[to_impute].T[to_impute]
            uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot(
                [Vuo_ld, Do, Voo_ld])
            ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot(
                [Vou_ld, Du, Vuo_ld])
            uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot(
                [Vuo_ld, Do, Vou_ld])
        else:
            A = annot.get_matrix(merged_snps)
            names = annot.names
            Ao = A[obs]
            flag = np.mean(Ao != 0, axis=0) > 0
            Ao = Ao.T[flag].T
            A = A.T[flag].T
            names = names[flag]

            log.debug("Starting inference for variance parameters")
            estimates = pyfizi.infer_taus(obsZ, Voo_ld, Ao)
            if estimates is not None:
                log.debug("Finished variance parameter inference")

                estimates, sigma2e = estimates
                # rescale estimates
                estimates = estimates * np.sum(Ao != 0, axis=0) / np.sum(
                    A != 0, axis=0)

                # N gets inferred as part of the parameter
                D = np.diag(np.dot(A, estimates))
                Do = D.T[obs].T[obs]
                Du = D.T[to_impute].T[to_impute]
                uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot(
                    [Vuo_ld, Do, Voo_ld])
                ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot(
                    [Vou_ld, Du, Vuo_ld])
                uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot(
                    [Vuo_ld, Do, Vou_ld])
            else:
                log.warning(
                    "Variance parameter optimization failed. Defaulting to ImpG"
                )
                # estimation failed... default to ImpG
                uoV = Vuo_ld
                ooV = Voo_ld
                uuV = Vuu_ld
    else:
        uoV = Vuo_ld
        ooV = Voo_ld
        uuV = Vuu_ld

    log.debug(
        "Computing inverse of variance-covariance matrix for {} observed SNPs".
        format(nobs))
    ooVinv = pinvh(ooV, check_finite=False)

    log.debug("Imputing {} SNPs from {} observed scores".format(nimp, nobs))
    impZs = mdot([uoV, ooVinv, obsZ])

    # compute r2-pred scores
    r2blup = np.diag(mdot([uoV, ooVinv, uoV.T])) / np.diag(uuV)

    # compute two-sided z-test for p-value
    pvals = chi2.sf(impZs**2, 1)

    return impZs, pvals, r2blup
Esempio n. 15
0
def _infer_taus_reml(zscores, LD, A):
    """
    Infer the variance parameters (tau) using maximum likelihood over the summary statistics

    :param zscores:  numpy.ndarray of zscores
    :param LD: numpy.ndarray LD matrix representing correlation structure among `zscores`
    :param A: numpy.ndarray functional category annotation matrix

    :return: (numpy.ndarray taus, float sigma2e) tuple of the estimate taus and the residual variance
    """

    k, t = A.shape
    r = t + 1

    def get_component(LD, A, jdx):
        return np.dot(LD * A.T[jdx], LD)

    As = [get_component(LD, A, jdx) for jdx in range(t)] + [LD]

    # start from the null of all variance explained by LD/finite-sample
    init = np.zeros(r)
    sigma2e = mdot([zscores, pinvh(LD), zscores]) / k
    init[-1] = sigma2e

    # TODO: replace V and P terms as globals that the closures have access to
    # it should speed things up by only needing to compute V and P once per iteration

    # negative log-likelihood (NLL) of the zscores given the variance parameters
    def obj(vars):
        V = sum(As[i] * vars[i] for i in range(r))
        logL = -mvn.logpdf(zscores, cov=V, allow_singular=True)
        print("NLL({}) = {}".format(",".join(map(str, vars)), logL))
        return logL

    # gradient of the NLL
    def grad(vars):
        g = np.zeros(r)
        V = sum(As[i] * vars[i] for i in range(r))
        P = pinvh(V)
        ztP = np.dot(zscores, P)
        Pz = ztP.T
        for i in range(r):
            Ai = As[i]
            g[i] = np.trace(P.dot(Ai)) - mdot([ztP, Ai, Pz])

        g = 0.5 * g
        print("||g|| = {}".format(norm(g)))
        return g

    # average-information matrix of the NLL; not really the hessian...
    def hess(vars):
        AI = np.zeros((r, r))
        V = sum(As[i] * vars[i] for i in range(r))
        P = pinvh(V)
        ztP = np.dot(zscores.T, P)
        Pz = ztP.T
        for i in range(r):
            ztPAsi = np.dot(ztP, As[i])
            for j in range(i + 1):
                AI[i, j] = mdot([ztPAsi, P, As[j], Pz])
                AI[j, i] = AI[i, j]

        AI = 0.5 * AI
        return AI

    try:
        # trust-ncg should be more robust compared with ncg
        res = minimize(obj,
                       init,
                       method="trust-ncg",
                       jac=grad,
                       hess=hess,
                       options={"gtol": 1e-3})
        if res.success:
            result = (res.x[:-1], res.x[-1])
        else:
            result = None
    except Exception as exc:
        result = None

    return result
Esempio n. 16
0
File: Stats.py Progetto: parcap/nrp
 def port_vol(self, w, cov):
     return np.sqrt(mdot([w.T, cov, w]))
Esempio n. 17
0
# %% [markdown]
# ## LinReg with numpy

# %%
X = np.random.random((5, 3))
y = np.random.random(5)
X.shape, y.shape

# %% [markdown]
# Calculate the optimal parameter:
# $\hat\beta = (X^T X)^{-1} X^T y$

# %%
XT = X.T  # transpose

beta_ = mdot([inv(XT @ X), XT, y])
beta_

# %%
XT = X.T  # transpose

beta_ = inv(XT @ X) @ XT @ y
beta_

# %% [markdown]
# The model $f$:


# %%
def f(X, beta):
    return X @ beta