def _remma_epiDD_eff(y, xmat, zmat, gmat_lst, var_com, bed_file, snp_lst_0=None, var_app=1.0, p_cut=1.0e-5, out_file='epiDD_eff'): """ Estimate dominance by dominance epistasis effects by random SNP-BLUP model. :param y: phenotypic vector :param xmat: Designed matrix for fixed effect :param zmat: csr sparse matrix. Designed matrix for random effect. :param gmat_lst: A list for relationship matrix :param var_com: Estimated variances :param bed_file: the prefix for plink binary file :param snp_lst_0: the first SNP list for the SNP pairs. the min value is 0 and the max value is num_snp-2. The default value is None, which means list [0, num_snp-1) :param var_app: the approximate variances for estimated SNP effects. :param p_cut: put cut value. default value is 1.0e-5. :param out_file: output file. default value is 'remma_epiDD_eff'. :return: 0 """ logging.info("Calculate the phenotypic covariance matrix and inversion") y = np.array(y).reshape(-1, 1) n = y.shape[0] xmat = np.array(xmat).reshape(n, -1) vmat = np.diag([var_com[-1]] * n) for val in range(len(gmat_lst)): vmat += zmat.dot((zmat.dot(gmat_lst[val])).T) * var_com[val] del gmat_lst gc.collect() vmat_inv = np.linalg.inv(vmat) logging.info("Calculate P matrix") vxmat = np.dot(vmat_inv, xmat) xvxmat = np.dot(xmat.T, vxmat) xvxmat = np.linalg.inv(xvxmat) pmat = reduce(np.dot, [vxmat, xvxmat, vxmat.T]) pmat = vmat_inv - pmat pymat = zmat.T.dot(np.dot(pmat, y)) del vmat, vmat_inv, pmat gc.collect() num_snp = pd.read_csv(bed_file+'.bim', header=None).shape[0] num_id = pd.read_csv(bed_file+'.fam', header=None).shape[0] if snp_lst_0 is None: snp_lst_0 = range(num_snp - 1) else: if max(snp_lst_0) >= num_snp - 1 or min(snp_lst_0) < 0: logging.error('snp_lst_0 is out of range!') sys.exit() logging.info("Convert python variates to C type") pbed_file = ffi.new("char[]", bed_file.encode('ascii')) pnum_id = ffi.cast("long long", num_id) pnum_snp = ffi.cast("long long", num_snp) snp_lst_0 = np.array(list(snp_lst_0), dtype=np.longlong) psnp_lst_0 = ffi.cast("long long *", snp_lst_0.ctypes.data) # psnp_lst_0 = ffi.cast("long long *", ffi.from_buffer(snp_lst_0)) plen_snp_lst_0 = ffi.cast("long long", len(snp_lst_0)) ppymat = ffi.cast("double *", pymat.ctypes.data) chi_cut = chi2.isf(p_cut, 1) eff_cut = np.sqrt(chi_cut * var_app) peff_cut = ffi.cast("double", eff_cut) temp_file = out_file + '.temp' pout_file = ffi.new("char[]", temp_file.encode('ascii')) logging.info('Test') clock_t0 = time.perf_counter() cpu_t0 = time.process_time() lib.remma_epiDD_eff_cpu(pbed_file, pnum_id, pnum_snp, psnp_lst_0, plen_snp_lst_0, ppymat, peff_cut, pout_file) clock_t1 = time.perf_counter() cpu_t1 = time.process_time() logging.info("Running time: Clock time, {:.5f} sec; CPU time, {:.5f} sec.".format(clock_t1 - clock_t0, cpu_t1 - cpu_t0)) logging.info('Add the approximate P values') with open(temp_file) as fin, open(out_file, 'w') as fout: head_line = fin.readline() head_line = head_line.strip() head_line += ' chi_app p_app\n' fout.write(head_line) for line in fin: arr = line.split() chi_app = float(arr[-1]) * float(arr[-1]) / var_app p_app = chi2.sf(chi_app, 1) fout.write(' '.join(arr + [str(chi_app), str(p_app)]) + '\n') os.remove(temp_file) return 0
def remma_epiAA_eff_cpu_c(y, xmat, zmat, gmat_lst, var_com, bed_file, snp_lst_0=None, eff_cut=-999.0, out_file='remma_epiAA_eff_cpu_c'): """ 加加上位检验 :param y: 表型 :param xmat: 固定效应设计矩阵 :param zmat: 随机效应设计矩阵,csr稀疏矩阵 :param gmat_lst: 基因组关系矩阵列表 :param var_com: 方差组分 :param bed_file: plink文件 :param snp_lst_0: 互作对第一个SNP列表,最大值为num_snp-2,最小值为0 :param eff_cut: 依据阈值保留的互作对 :param out_file: 输出文件 :return: """ logging.info("计算V矩阵及其逆矩阵") y = np.array(y).reshape(-1, 1) n = y.shape[0] xmat = np.array(xmat).reshape(n, -1) vmat = np.diag([var_com[-1]] * n) for val in range(len(gmat_lst)): vmat += zmat.dot((zmat.dot(gmat_lst[val])).T) * var_com[val] del gmat_lst gc.collect() vmat_inv = np.linalg.inv(vmat) logging.info("计算P矩阵") vxmat = np.dot(vmat_inv, xmat) xvxmat = np.dot(xmat.T, vxmat) xvxmat = np.linalg.inv(xvxmat) pmat = reduce(np.dot, [vxmat, xvxmat, vxmat.T]) pmat = vmat_inv - pmat pymat = zmat.T.dot(np.dot(pmat, y)) del vmat, vmat_inv, pmat gc.collect() num_snp = pd.read_csv(bed_file + '.bim', header=None).shape[0] num_id = pd.read_csv(bed_file + '.fam', header=None).shape[0] if snp_lst_0 is None: snp_lst_0 = range(num_snp - 1) else: if max(snp_lst_0) >= num_snp - 1 or min(snp_lst_0) < 0: logging.error('snp_lst_0 is out of range!') sys.exit() logging.info("python变量转化到C") pbed_file = ffi.new("char[]", bed_file.encode('ascii')) pnum_id = ffi.cast("long long", num_id) pnum_snp = ffi.cast("long long", num_snp) snp_lst_0 = np.array(list(snp_lst_0), dtype=np.longlong) psnp_lst_0 = ffi.cast("long long *", snp_lst_0.ctypes.data) # psnp_lst_0 = ffi.cast("long long *", ffi.from_buffer(snp_lst_0)) plen_snp_lst_0 = ffi.cast("long long", len(snp_lst_0)) ppymat = ffi.cast("double *", pymat.ctypes.data) peff_cut = ffi.cast("double", eff_cut) pout_file = ffi.new("char[]", out_file.encode('ascii')) logging.info('检验') clock_t0 = time.perf_counter() cpu_t0 = time.process_time() lib.remma_epiAA_eff_cpu(pbed_file, pnum_id, pnum_snp, psnp_lst_0, plen_snp_lst_0, ppymat, peff_cut, pout_file) clock_t1 = time.perf_counter() cpu_t1 = time.process_time() logging.info( "Running time: Clock time, {:.5f} sec; CPU time, {:.5f} sec.".format( clock_t1 - clock_t0, cpu_t1 - cpu_t0)) return 0