def main(): """Entry point if called as an executable""" ## quantitative covariables #qcovar = [os.path.join(PHE_DIR, 'age.txt'),PCS] qcovar_par = [] for qcov in QUANT_COVAR: qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables #covar = [os.path.join(PHE_DIR, 'sex.txt'),os.path.join(PHE_DIR, 'centre.txt')] covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 1. All SNPS, 10 PCs ============= in_file = os.path.join(GRM_DIR, 'grm-all', 'all') out_hsq = os.path.join(HSQ_DIR, 'hsq-all', 'all') for pheno in PHE_LIST: out_file = out_hsq +'.'+pheno print('running h^2 gcta estimation for phenotype: ' + pheno) pheno = os.path.join(PHE_DIR, pheno+'.txt') pars = var_par + ['--pheno', pheno, '--reml'] preprocessing.gcta_hsq(in_file=in_file, out_file=out_file, gcta=GCTA, other_gcta_par=pars, ncpus=NBPROC, mygcta=MYGCTA, sbatch=USE_SBATCH, sbatch_par_j="hsq-all") ## extract number of SNPs per chromosome with open(out_hsq + '.nbSNPs.txt', 'w') as in_filenb: nsnp = preprocessing.read_grm_bin_n(in_file) in_filenb.write('all' + ' ' + str(nsnp) + '\n')
def main(): """Entry point if called as an executable""" ## quantitative covariables qcovar_par = [] for qcov in QUANT_COVAR: if qcov == PCS: continue qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 1bis. All SNPS, no PC ============= in_file = os.path.join(GRM_DIR, 'grm-all/all') out_dir = os.path.join(HSQ_DIR, 'hsq-nopca', 'nopca') for pheno in PHE_LIST: out_file = out_dir+'.'+pheno print('running h^2 gcta estimation (no PCs) for phenotype: ' + pheno) pheno = os.path.join(PHE_DIR, pheno + '.txt') pars = var_par + ['--pheno', pheno, '--reml'] preprocessing.gcta_hsq(in_file=in_file, out_file=out_file, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars, sbatch=USE_SBATCH, sbatch_par_j="hsq-nopca")
def main(): """Entry point if called as an executable""" ## quantitative covariables qcovar_par = [] for qcov in QUANT_COVAR: qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 4. MAF ============= in_dir_grm_maf = os.path.join(GRM_DIR, 'grm-maf') out_hsq_maf = os.path.join(HSQ_DIR, 'hsq-maf') maf_intervals = [(0.05, 0.20), (0.20, 0.35), (0.35, 0.50)] if not os.path.exists(out_hsq_maf): os.makedirs(out_hsq_maf) # write the number of SNPs associated with each subgroup in_filenb = open(os.path.join(out_hsq_maf, 'maf.nbSNPs.txt'), 'w') # write the GRM used to partitionate h^2 in_file = open(os.path.join(out_hsq_maf, 'maf.test.txt'), 'w') for maf_int in maf_intervals: maf_int_char = str(maf_int[0]) + '-' + str(maf_int[1]) in_file_grm_mafint = os.path.join(in_dir_grm_maf, 'maf' + str(maf_int_char), 'maf.' + str(maf_int_char)) in_file.write(in_file_grm_mafint + '\n') # extract number of SNPs nsnp = preprocessing.read_grm_bin_n(in_file_grm_mafint) in_filenb.write('maf.' + str(maf_int_char) + ' ' + str(nsnp) + '\n') in_file.close() in_filenb.close() for pheno in PHE_LIST: for lrt in [1, 2, 3]: out_file = os.path.join(out_hsq_maf, 'maf' + '.' + str(lrt) + '.' + pheno) phenopath = os.path.join(PHE_DIR, pheno + '.txt') pars = var_par + [ '--pheno', phenopath, '--reml-lrt', str(lrt), '--reml' ] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=USE_SBATCH, sbatch_par_j="hsq-maf")
def main(dataset): """Entry point if called as an executable""" # quantitative covariables config = config_dataset.config_dataset(dataset) qcovar_par = [] for qcov in config.quant_covar: qcovar_par.append('--qcovar') qcovar_par.append(qcov) # qualitative covariables covar_par = [] for cov in config.qual_covar: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par # ========= 4. gene lists ============= margin = 50 for namesel, grmsel in [('neurodev', os.path.join(config.grm_dir, 'grm-neurodev')), ('cnsexpression', os.path.join(config.grm_dir, 'grm-cnsexpression')) ]: print("Gene set name:", namesel) print("GRMs:", grmsel) in_genesel = os.path.join(config.grm_dir, 'grm-' + namesel) # + -0.025 ? out_genesel = os.path.join(config.hsq_dir, 'hsq-' + namesel) # + '-margin' + str(margin)) in_genesel_margin = os.path.join(in_genesel, namesel + '-margin' + str(margin), namesel) in_nongenesel_margin = os.path.join(in_genesel, 'non' + namesel + '-margin' + str(margin), 'non' + namesel) in_nongenic_margin = os.path.join(config.grm_dir, 'grm-genic', 'nongenic-margin' + str(margin), 'nongenic-margin' + str(margin)) out_genesel_margin = os.path.join(out_genesel, namesel + '-margin' + str(margin)) print("hsq outputs:", out_genesel) if not os.path.exists(out_genesel): os.makedirs(out_genesel) # input both snp in gene lists and snp not in gene lists grm for variance partitioning with open(out_genesel_margin + '.test.txt', 'w') as in_file: in_file.write(in_genesel_margin + '\n') in_file.write(in_nongenesel_margin + '\n') in_file.write(in_nongenic_margin + '\n') # save the number of SNPs associated with each subgroup lc_genesel = preprocessing.read_grm_bin_n(in_genesel_margin) lc_nongenesel = preprocessing.read_grm_bin_n(in_nongenesel_margin) lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin) in_filenb = open(os.path.join(out_genesel, namesel + '.nbSNPs.txt'), 'w') in_filenb.write('{} {}\n'.format(os.path.basename(in_genesel_margin), lc_genesel)) in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenesel_margin), lc_nongenesel)) in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic)) in_filenb.close() for pheno in config.phe_list: print('running h^2 gcta estimation for phenotype: ' + pheno) for lrt in [1, 2, 3]: out_file = out_genesel_margin + '.' + str(lrt) + '.' + pheno phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=config.use_sbatch, sbatch_par_j="hsq-genesel")
def main(): """Entry point if called as an executable""" ## quantitative covariables qcovar_par = [] for qcov in QUANT_COVAR: qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 3. Genic/non-genic ============= in_genic = os.path.join(GRM_DIR, 'grm-genic') # + -0.025 ? in_nongenic = os.path.join(GRM_DIR, 'grm-genic') # + -0.025 ? out_genic = os.path.join(HSQ_DIR, 'hsq-genic') if not os.path.exists(out_genic): os.makedirs(out_genic) in_filenb = open(os.path.join(out_genic, 'genic.nbSNPs.txt'), 'w') for margin in [0, 20, 50]: out_genic_margin = os.path.join(out_genic, 'genic-margin' + str(margin)) in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(margin), 'genic-margin' + str(margin)) in_nongenic_margin = os.path.join(in_nongenic, 'nongenic-margin' + str(margin), 'nongenic-margin' + str(margin)) # input both genic and non-genic and genic grm for variance partitioning print(out_genic_margin + '.test.txt') in_file = open(out_genic_margin + '.test.txt', 'w') in_file.write(in_genic_margin + '\n') in_file.write(in_nongenic_margin) in_file.close() # save the number of SNPs associated with each subgroup lc_genic = preprocessing.read_grm_bin_n(in_genic_margin) lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin) in_filenb.write('genic-margin' + str(margin) + ' ' + str(lc_genic) + '\n') in_filenb.write('nongenic-margin' + str(margin) + ' ' + str(lc_nongenic) + '\n') for pheno in PHE_LIST: for lrt in [1, 2]: # --reml-lrt 1 # Calculate the log likelihood of a reduce model with one or multiple genetic # variance components dropped from the full model and calculate the LRT and p-value. # By default, GCTA will always calculate and report the LRT for the first genetic # variance component, i.e. --reml-lrt 1, unless you re-specify this option, # e.g. --reml-lrt 2 assuming there are a least two genetic variance components # included in the analysis. You can also test multiple components simultaneously, # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details. out_file = out_genic_margin + '.' + str(lrt) + '.' + pheno print(pheno) phenopath = os.path.join(PHE_DIR, pheno + '.txt') pars = var_par + [ '--pheno', phenopath, '--reml-lrt', str(lrt), '--reml' ] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=USE_SBATCH, sbatch_par_j="hsq-genic") in_filenb.close()
def main(config_file): """Entry point if called as an executable""" config = config_dataset.config_dataset(config_file) # quantitative covariables qcovar_par = [] for qcov in config.quant_covar: qcovar_par.append('--qcovar') qcovar_par.append(qcov) # qualitative covariables covar_par = [] for cov in config.qual_covar: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par # ========= 4. MAF ============= in_dir_grm_maf = os.path.join(config.grm_dir, 'grm-maf') out_hsq_maf = os.path.join(config.hsq_dir, 'hsq-maf') maf_intervals = config.maf_intervals if not os.path.exists(out_hsq_maf): os.makedirs(out_hsq_maf) # write the number of SNPs associated with each subgroup in_filenb = open(os.path.join(out_hsq_maf, 'maf.nbSNPs.txt'), 'w') # write the GRM used to partitionate h^2 in_file = open(os.path.join(out_hsq_maf, 'maf.test.txt'), 'w') for maf_int in maf_intervals: maf_int_char = str(maf_int[0]) + '-' + str(maf_int[1]) in_file_grm_mafint = os.path.join(in_dir_grm_maf, 'maf' + str(maf_int_char), 'maf.' + str(maf_int_char)) in_file.write(in_file_grm_mafint + '\n') # extract number of SNPs nsnp = preprocessing.read_grm_bin_n(in_file_grm_mafint) in_filenb.write('maf.' + str(maf_int_char) + ' ' + str(nsnp) + '\n') in_file.close() in_filenb.close() for pheno in config.phe_list: for lrt in [1,2,3,4]: out_file = os.path.join(out_hsq_maf, 'maf' + '.' + str(lrt) + '.' + pheno) phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=config.use_sbatch, sbatch_par_j="hsq-maf")
def main(config_file): """Entry point if called as an executable""" config = config_dataset.config_dataset(config_file) # quantitative covariables qcovar_par = [] for qcov in config.quant_covar: qcovar_par.append('--qcovar') qcovar_par.append(qcov) # qualitative covariables covar_par = [] for cov in config.qual_covar: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par # ========= per chr ============= in_dir_grm_perchr = os.path.join(config.grm_dir, 'grm-all-' + str(config.grm_cutoff), 'all-' + str(config.grm_cutoff) + '-chr') out_hsq_perchr = os.path.join(config.hsq_dir, 'hsq-perchr') if not os.path.exists(out_hsq_perchr): os.makedirs(out_hsq_perchr) # extract number of SNPs per chromosome in_filenb = open(os.path.join(out_hsq_perchr, 'perchr.nbSNPs.txt'), 'w') for chrom in list(range(1, 23)): nsnp = preprocessing.read_grm_bin_n(in_dir_grm_perchr + str(chrom)) in_filenb.write('chr' + str(chrom) + ' ' + str(nsnp) + '\n') in_filenb.close() for pheno in config.phe_list: phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, str(config.reml_call)] for chrom in list(range(1, 23)): in_file = in_dir_grm_perchr + str(chrom) out_file = os.path.join(out_hsq_perchr, 'chr' + str(chrom) + '.' + pheno) preprocessing.gcta_hsq(in_file=in_file, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, other_gcta_par=pars, ncpus=config.nbproc, sbatch=config.use_sbatch, sbatch_par_j="hsq-perchr") # sum across chr in_file_allchr = open(os.path.join(out_hsq_perchr, 'perchr.test.txt'), 'w') for chrom in list(range(1, 23)): in_file_allchr.write(in_dir_grm_perchr + str(chrom) + '\n') in_file_allchr.close() for pheno in config.phe_list: out_file = os.path.join(out_hsq_perchr, 'allchr' + '.' + pheno) phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--reml-maxit', str(200), '--pheno', phenopath, str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file_allchr.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, other_gcta_par=pars, par_input='--mgrm-bin', ncpus=config.nbproc, sbatch=config.use_sbatch, sbatch_par_j="hsq-perchr")
def main(): """Entry point if called as an executable""" ## quantitative covariables qcovar_par = [] for qcov in QUANT_COVAR: qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 4. gene lists ============= margin = 50 for namesel, grmsel in [('neurodev', os.path.join(GRM_DIR, 'grm-neurodev')), ('cnsexpression', os.path.join(GRM_DIR, 'grm-cnsexpression')) ]: print("Gene set name:", namesel) print("GRMs:", grmsel) in_genesel = os.path.join(GRM_DIR, 'grm-' + namesel) # + -0.025 ? in_genesel_margin = os.path.join(in_genesel, namesel + '-margin' + str(margin), namesel) in_nongenesel_margin = os.path.join(in_genesel, 'non' + namesel + '-margin' + str(margin), 'non'+namesel) in_nongenic_margin = os.path.join(GRM_DIR, 'grm-genic', 'nongenic-margin' + str(margin), 'nongenic-margin' + str(margin)) out_genesel = os.path.join(HSQ_DIR, 'hsq-' + namesel) # + '-margin' + str(margin)) print("hsq outputs:", out_genesel) if not os.path.exists(out_genesel): os.makedirs(out_genesel) # input both snp in gene lists and snp not in gene lists grm for variance partitioning in_file = open(os.path.join(out_genesel, namesel + '.test.txt'), 'w') in_file.write(in_genesel_margin + '\n') in_file.write(in_nongenesel_margin + '\n') in_file.write(in_nongenic_margin + '\n') in_file.close() # save the number of SNPs associated with each subgroup lc_genesel = preprocessing.read_grm_bin_n(in_genesel_margin) lc_nongenesel = preprocessing.read_grm_bin_n(in_nongenesel_margin) lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin) in_filenb = open(os.path.join(out_genesel, namesel + '.nbSNPs.txt'), 'w') in_filenb.write('non' + namesel + ' ' + str(lc_nongenesel) + '\n') in_filenb.write(namesel + ' ' + str(lc_genesel) + '\n') in_filenb.write('nongenic' + ' ' + str(lc_nongenic) + '\n') in_filenb.close() for pheno in PHE_LIST: print('running h^2 gcta estimation for phenotype: ' + pheno) for lrt in [1, 2, 3]: out_file = os.path.join(out_genesel, namesel + '.' + str(lrt) + '.' + pheno) phenopath = os.path.join(PHE_DIR, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), '--reml'] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=USE_SBATCH, sbatch_par_j="hsq-genesel")
def main(): """Entry point if called as an executable""" ## Run if .hsq file was already computed ? # If TRUE, will compute again the hsq, # otherwise won't compute the hsq if the .hsq file is present overwrite = False ## quantitative covariables qcovar_par = [] for qcov in QUANT_COVAR: qcovar_par.append('--qcovar') qcovar_par.append(qcov) ## qualitative covariables covar_par = [] for cov in QUAL_COVAR: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par ### ========= 2. All SNPS, 10 pcs, bivariate analysis ============= out_biv = os.path.join(HSQ_DIR, 'hsq-biv') in_file = os.path.join(GRM_DIR, 'grm-all-0.025/all-0.025') treated = [] for pheno1 in PHE_LIST: for pheno2 in PHE_LIST: treated.append(pheno1 + '_' + pheno2) if (pheno1 != pheno2 and (not pheno1 + '.' + pheno2 in treated) and (not pheno2 + '.' + pheno1 in treated)): pheno1path = os.path.join(PHE_DIR, pheno1 + '.txt') pheno2path = os.path.join(PHE_DIR, pheno2 + '.txt') phenopair = os.path.splitext( pheno1)[0] + '.' + os.path.splitext(pheno2)[0] out_biv_all = os.path.join(out_biv, phenopair) pars = var_par + [ '--pheno', pheno1path, '--pheno', pheno2path, '--reml-maxit', str(200), '--reml-bivar', '--reml-bendV' ] if not os.path.isfile(out_biv_all + '.hsq') and not overwrite: preprocessing.gcta_hsq(in_file=in_file, out_file=out_biv_all, gcta=GCTA, mygcta=MYGCTA, other_gcta_par=pars, ncpus=NBPROC, sbatch=USE_SBATCH, sbatch_par_j="hsq-biv", sbatch_par_p="common", sbatch_par_qos="normal") out_biv_rg0 = os.path.join(out_biv, 'all.rg=0.' + phenopair) pars_rg0 = pars + ['--reml-bivar-lrt-rg', str(0)] if not os.path.isfile(out_biv_rg0 + '.hsq') and not overwrite: preprocessing.gcta_hsq(in_file=in_file, out_file=out_biv_rg0, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars_rg0, sbatch=USE_SBATCH, sbatch_par_j="hsq-biv", sbatch_par_p="common", sbatch_par_qos="normal") out_biv_rg1 = os.path.join(out_biv, 'all.rg=1.' + phenopair) pars_rg1 = pars + ['--reml-bivar-lrt-rg', str(1)] if not os.path.isfile(out_biv_rg1 + '.hsq') and not overwrite: preprocessing.gcta_hsq(in_file=in_file, out_file=out_biv_rg1, gcta=GCTA, mygcta=MYGCTA, ncpus=NBPROC, other_gcta_par=pars_rg1, sbatch=USE_SBATCH, sbatch_par_j="hsq-biv", sbatch_par_p="common", sbatch_par_qos="normal")
def main(config_file): """Entry point if called as an executable""" # Run if .hsq file was already computed ? # If TRUE, will compute again the hsq, # otherwise won't compute the hsq if the .hsq file is present overwrite = False config = config_dataset.config_dataset(config_file) # quantitative covariables qcovar_par = [] for qcov in config.quant_covar: qcovar_par.append('--qcovar') qcovar_par.append(qcov) # qualitative covariables covar_par = [] for cov in config.qual_covar: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par # ========= 2. All SNPS, 10 pcs, bivariate analysis ============= out_biv = os.path.join(config.hsq_dir, 'hsq-biv') in_file = os.path.join(config.grm_dir, 'grm-all-0.025/all-0.025') treated = [] for pheno1 in config.phe_list: for pheno2 in config.phe_list: treated.append(pheno1+'_'+pheno2) if (pheno1 != pheno2 and (pheno1+'.'+pheno2 not in treated) and (pheno2+'.'+pheno1 not in treated)): pheno1path = os.path.join(config.phe_dir, pheno1 + '.txt') pheno2path = os.path.join(config.phe_dir, pheno2 + '.txt') phenopair = os.path.splitext(pheno1)[0] + '.' + os.path.splitext(pheno2)[0] out_biv_all = os.path.join(out_biv, phenopair) if config.reml_bivar_call == '--reml-bivar-no-constrain': pars = var_par + ['--pheno', pheno1path, '--pheno', pheno2path, '--reml-maxit', str(200), '--reml-bivar', str(config.reml_bivar_call)] else: pars = var_par + ['--pheno', pheno1path, '--pheno', pheno2path, '--reml-maxit', str(200), '--reml-bivar'] # , '--reml-bendV'] # if not os.path.isfile(out_biv_all+'.hsq') or overwrite: # preprocessing.gcta_hsq(in_file=in_file, # out_file=out_biv_all, # gcta=config.gcta, # mygcta=config.mygcta, # other_gcta_par=pars, # ncpus=config.nbproc, # sbatch=config.use_sbatch, # sbatch_par_j="hsq-biv", # sbatch_par_p="dedicated", # "common", # sbatch_par_qos="fast") # "normal") out_biv_rg0 = os.path.join(out_biv, 'all.rg=0.' + phenopair) pars_rg0 = pars + ['--reml-bivar-lrt-rg', str(0)] if not os.path.isfile(out_biv_rg0+'.hsq') or overwrite: preprocessing.gcta_hsq(in_file=in_file, out_file=out_biv_rg0, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars_rg0, sbatch=config.use_sbatch, sbatch_par_j="hsq-biv", sbatch_par_p="dedicated", # "common", sbatch_par_qos="fast") # "normal")
def main(config_file): """Entry point if called as an executable""" # quantitative covariables config = config_dataset.config_dataset(config_file) qcovar_par = [] for qcov in config.quant_covar: qcovar_par.append('--qcovar') qcovar_par.append(qcov) # qualitative covariables covar_par = [] for cov in config.qual_covar: covar_par.append('--covar') covar_par.append(cov) var_par = qcovar_par + covar_par margins = [0, 10, 20, 30, 40, 50] # ========= 3.1 Genic/non-genic ============= in_genic = os.path.join(config.grm_dir, 'grm-genic') # + -0.025 ? in_nongenic = os.path.join(config.grm_dir, 'grm-genic') # + -0.025 ? out_genic = os.path.join(config.hsq_dir, 'hsq-genic') if not os.path.exists(out_genic): os.makedirs(out_genic) in_filenb = open(os.path.join(out_genic, 'genic.nbSNPs.txt'), 'w') for margin in margins: in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(margin), 'genic-margin' + str(margin)) in_nongenic_margin = os.path.join(in_nongenic, 'nongenic-margin' + str(margin), 'nongenic-margin' + str(margin)) out_genic_margin = os.path.join(out_genic, 'genic-margin' + str(margin)) # input both genic and non-genic and genic grm for variance partitioning print(out_genic_margin + '.test.txt') with open(out_genic_margin + '.test.txt', 'w') as in_file: in_file.write(in_genic_margin + '\n') in_file.write(in_nongenic_margin) # save the number of SNPs associated with each subgroup lc_genic = preprocessing.read_grm_bin_n(in_genic_margin) lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin) in_filenb.write('{} {}\n'.format(os.path.basename(in_genic_margin), lc_genic)) in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic)) for pheno in config.phe_list: for lrt in [1, 2]: # --reml-lrt 1 # Calculate the log likelihood of a reduce model with one or multiple genetic # variance components dropped from the full model and calculate the LRT and p-value. # By default, GCTA will always calculate and report the LRT for the first genetic # variance component, i.e. --reml-lrt 1, unless you re-specify this option, # e.g. --reml-lrt 2 assuming there are a least two genetic variance components # included in the analysis. You can also test multiple components simultaneously, # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details. out_file = out_genic_margin + '.' + str(lrt) + '.' + pheno print(pheno) phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=config.use_sbatch, sbatch_par_j="hsq-genic") # ========= 3.2 Genic / xxk upstream and downstream / non-genic ============= for margin in margins: if margin > 0: in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(0), 'genic-margin' + str(0)) in_updown = os.path.join(in_genic, 'updown-margin' + str(margin), 'updown-margin' + str(margin)) in_nongenic_margin = os.path.join(in_genic, 'nongenic-margin' + str(margin), 'nongenic-margin' + str(margin)) out_updown_margin = os.path.join(out_genic, 'updown-margin' + str(margin)) # input genic, genic +/- marginkb, and non-genic grm for variance partitioning print(out_updown_margin + '.test.txt') with open(out_updown_margin + '.test.txt', 'w') as in_file: in_file.write(in_genic_margin + '\n') in_file.write(in_updown + '\n') in_file.write(in_nongenic_margin) # save the number of SNPs associated with each subgroup lc_genic = preprocessing.read_grm_bin_n(in_genic_margin) lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin) lc_updown = preprocessing.read_grm_bin_n(in_updown) in_filenb.write('{} {}\n'.format(os.path.basename(in_genic_margin), lc_genic)) in_filenb.write('{} {}\n'.format(os.path.basename(in_updown), lc_updown)) in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic)) for pheno in config.phe_list: for lrt in [1, 2]: # --reml-lrt 1 # Calculate the log likelihood of a reduce model with one or multiple genetic # variance components dropped from the full model and calculate the LRT and p-value. # By default, GCTA will always calculate and report the LRT for the first genetic # variance component, i.e. --reml-lrt 1, unless you re-specify this option, # e.g. --reml-lrt 2 assuming there are a least two genetic variance components # included in the analysis. You can also test multiple components simultaneously, # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details. out_file = out_updown_margin + '.' + str(lrt) + '.' + pheno print(pheno) phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=config.use_sbatch, sbatch_par_j="hsq-genic") # ========= 3.2 Genic / 0-20k and 20-50k upstream and downstream / non-genic ============= if (20 in margins and 50 in margins): in_updown1 = os.path.join(in_genic, 'updown-margin' + str(20), 'updown-margin' + str(20)) in_updown2 = os.path.join(in_genic, 'updown-margin' + "20-50", 'updown-margin' + "20-50") in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(0), 'genic-margin' + str(0)) in_nongenic_margin = os.path.join(in_genic, 'nongenic-margin' + str(50), 'nongenic-margin' + str(50)) out_updown_margin = os.path.join(out_genic, 'updown-margin' + "20-50") # input genic, genic +/- marginkb, and non-genic grm for variance partitioning print(out_updown_margin + '.test.txt') in_file = open(out_updown_margin + '.test.txt', 'w') in_file.write(in_genic_margin + '\n') in_file.write(in_updown1 + '\n') in_file.write(in_updown2 + '\n') in_file.write(in_nongenic_margin) in_file.close() # save the number of SNPs associated with each subgroup lc_updown2 = preprocessing.read_grm_bin_n(in_updown2) in_filenb.write('updown-margin' + "20-50" + ' ' + str(lc_updown2) + '\n') print(lc_updown2) for pheno in config.phe_list: for lrt in [1, 2, 3, 4]: # --reml-lrt 1 # Calculate the log likelihood of a reduce model with one or multiple genetic # variance components dropped from the full model and calculate the LRT and p-value. # By default, GCTA will always calculate and report the LRT for the first genetic # variance component, i.e. --reml-lrt 1, unless you re-specify this option, # e.g. --reml-lrt 2 assuming there are a least two genetic variance components # included in the analysis. You can also test multiple components simultaneously, # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details. out_file = out_updown_margin + '.' + str(lrt) + '.' + pheno print(pheno) phenopath = os.path.join(config.phe_dir, pheno + '.txt') pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)] preprocessing.gcta_hsq(in_file=in_file.name, out_file=out_file, gcta=config.gcta, mygcta=config.mygcta, ncpus=config.nbproc, other_gcta_par=pars, par_input='--mgrm-bin', sbatch=config.use_sbatch, sbatch_par_j="hsq-genic") in_filenb.close()