コード例 #1
0
def main():

    """Entry point if called as an executable"""
    ## quantitative covariables
    #qcovar = [os.path.join(PHE_DIR, 'age.txt'),PCS]
    qcovar_par = []
    for qcov in QUANT_COVAR:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    #covar = [os.path.join(PHE_DIR, 'sex.txt'),os.path.join(PHE_DIR, 'centre.txt')]
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par


    ### ========= 1. All SNPS, 10 PCs =============

    in_file = os.path.join(GRM_DIR, 'grm-all', 'all')
    out_hsq = os.path.join(HSQ_DIR, 'hsq-all', 'all')


    for pheno in PHE_LIST:

        out_file = out_hsq +'.'+pheno
        print('running h^2 gcta estimation for phenotype: ' + pheno)
        pheno = os.path.join(PHE_DIR, pheno+'.txt')
        pars = var_par + ['--pheno', pheno, '--reml']

        preprocessing.gcta_hsq(in_file=in_file,
                               out_file=out_file,
                               gcta=GCTA,
                               other_gcta_par=pars,
                               ncpus=NBPROC,
                               mygcta=MYGCTA,
                               sbatch=USE_SBATCH,
                               sbatch_par_j="hsq-all")

    ## extract number of SNPs per chromosome
    with open(out_hsq + '.nbSNPs.txt', 'w') as in_filenb:
        nsnp = preprocessing.read_grm_bin_n(in_file)
        in_filenb.write('all' + ' ' + str(nsnp)  + '\n')
コード例 #2
0
def main():
    """Entry point if called as an executable"""

    ## quantitative covariables
    qcovar_par = []
    for qcov in QUANT_COVAR:
        if qcov == PCS:
            continue
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)


    var_par = qcovar_par + covar_par


    ### ========= 1bis. All SNPS, no PC =============

    in_file = os.path.join(GRM_DIR, 'grm-all/all')
    out_dir = os.path.join(HSQ_DIR, 'hsq-nopca', 'nopca')

    for pheno in PHE_LIST:
        out_file = out_dir+'.'+pheno
        print('running h^2 gcta estimation (no PCs) for phenotype: ' + pheno)
        pheno = os.path.join(PHE_DIR, pheno + '.txt')
        pars = var_par + ['--pheno', pheno, '--reml']

        preprocessing.gcta_hsq(in_file=in_file,
                               out_file=out_file,
                               gcta=GCTA,
                               mygcta=MYGCTA,
                               ncpus=NBPROC,
                               other_gcta_par=pars,
                               sbatch=USE_SBATCH,
                               sbatch_par_j="hsq-nopca")
コード例 #3
0
ファイル: 05-04.hsq_maf.py プロジェクト: r03ert0/hsq-analyses
def main():
    """Entry point if called as an executable"""

    ## quantitative covariables
    qcovar_par = []
    for qcov in QUANT_COVAR:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    ### ========= 4. MAF  =============

    in_dir_grm_maf = os.path.join(GRM_DIR, 'grm-maf')
    out_hsq_maf = os.path.join(HSQ_DIR, 'hsq-maf')
    maf_intervals = [(0.05, 0.20), (0.20, 0.35), (0.35, 0.50)]

    if not os.path.exists(out_hsq_maf):
        os.makedirs(out_hsq_maf)

    # write the number of SNPs associated with each subgroup
    in_filenb = open(os.path.join(out_hsq_maf, 'maf.nbSNPs.txt'), 'w')
    # write the GRM used to partitionate h^2
    in_file = open(os.path.join(out_hsq_maf, 'maf.test.txt'), 'w')
    for maf_int in maf_intervals:
        maf_int_char = str(maf_int[0]) + '-' + str(maf_int[1])
        in_file_grm_mafint = os.path.join(in_dir_grm_maf,
                                          'maf' + str(maf_int_char),
                                          'maf.' + str(maf_int_char))
        in_file.write(in_file_grm_mafint + '\n')

        # extract number of SNPs
        nsnp = preprocessing.read_grm_bin_n(in_file_grm_mafint)
        in_filenb.write('maf.' + str(maf_int_char) + ' ' + str(nsnp) + '\n')

    in_file.close()
    in_filenb.close()

    for pheno in PHE_LIST:

        for lrt in [1, 2, 3]:

            out_file = os.path.join(out_hsq_maf,
                                    'maf' + '.' + str(lrt) + '.' + pheno)

            phenopath = os.path.join(PHE_DIR, pheno + '.txt')
            pars = var_par + [
                '--pheno', phenopath, '--reml-lrt',
                str(lrt), '--reml'
            ]

            preprocessing.gcta_hsq(in_file=in_file.name,
                                   out_file=out_file,
                                   gcta=GCTA,
                                   mygcta=MYGCTA,
                                   ncpus=NBPROC,
                                   other_gcta_par=pars,
                                   par_input='--mgrm-bin',
                                   sbatch=USE_SBATCH,
                                   sbatch_par_j="hsq-maf")
コード例 #4
0
def main(dataset):
    """Entry point if called as an executable"""
    # quantitative covariables

    config = config_dataset.config_dataset(dataset)
    qcovar_par = []
    for qcov in config.quant_covar:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    # qualitative covariables
    covar_par = []
    for cov in config.qual_covar:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    # ========= 4. gene lists =============

    margin = 50

    for namesel, grmsel in [('neurodev', os.path.join(config.grm_dir, 'grm-neurodev')),
                            ('cnsexpression', os.path.join(config.grm_dir, 'grm-cnsexpression'))
                            ]:

        print("Gene set name:", namesel)
        print("GRMs:", grmsel)

        in_genesel = os.path.join(config.grm_dir, 'grm-' + namesel)  # + -0.025 ?
        out_genesel = os.path.join(config.hsq_dir, 'hsq-' + namesel)  # + '-margin' + str(margin))

        in_genesel_margin = os.path.join(in_genesel,
                                         namesel + '-margin' + str(margin),
                                         namesel)
        in_nongenesel_margin = os.path.join(in_genesel,
                                            'non' + namesel + '-margin' + str(margin),
                                            'non' + namesel)
        in_nongenic_margin = os.path.join(config.grm_dir, 'grm-genic', 'nongenic-margin' + str(margin),
                                          'nongenic-margin' + str(margin))

        out_genesel_margin = os.path.join(out_genesel, namesel + '-margin' + str(margin))

        print("hsq outputs:", out_genesel)

        if not os.path.exists(out_genesel):
            os.makedirs(out_genesel)

        # input both snp in gene lists and snp not in gene lists grm for variance partitioning
        with open(out_genesel_margin + '.test.txt', 'w') as in_file:
            in_file.write(in_genesel_margin + '\n')
            in_file.write(in_nongenesel_margin + '\n')
            in_file.write(in_nongenic_margin + '\n')

        # save the number of SNPs associated with each subgroup
        lc_genesel = preprocessing.read_grm_bin_n(in_genesel_margin)
        lc_nongenesel = preprocessing.read_grm_bin_n(in_nongenesel_margin)
        lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin)
        in_filenb = open(os.path.join(out_genesel, namesel + '.nbSNPs.txt'), 'w')
        in_filenb.write('{} {}\n'.format(os.path.basename(in_genesel_margin), lc_genesel))
        in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenesel_margin), lc_nongenesel))
        in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic))
        in_filenb.close()

        for pheno in config.phe_list:

            print('running h^2 gcta estimation for phenotype: ' + pheno)

            for lrt in [1, 2, 3]:
                out_file = out_genesel_margin + '.' + str(lrt) + '.' + pheno

                phenopath = os.path.join(config.phe_dir, pheno + '.txt')
                pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)]

                preprocessing.gcta_hsq(in_file=in_file.name,
                                       out_file=out_file,
                                       gcta=config.gcta,
                                       mygcta=config.mygcta,
                                       ncpus=config.nbproc,
                                       other_gcta_par=pars,
                                       par_input='--mgrm-bin',
                                       sbatch=config.use_sbatch,
                                       sbatch_par_j="hsq-genesel")
コード例 #5
0
def main():
    """Entry point if called as an executable"""
    ## quantitative covariables

    qcovar_par = []
    for qcov in QUANT_COVAR:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    ### ========= 3. Genic/non-genic  =============

    in_genic = os.path.join(GRM_DIR, 'grm-genic')  # + -0.025 ?
    in_nongenic = os.path.join(GRM_DIR, 'grm-genic')  # + -0.025 ?
    out_genic = os.path.join(HSQ_DIR, 'hsq-genic')

    if not os.path.exists(out_genic):
        os.makedirs(out_genic)

    in_filenb = open(os.path.join(out_genic, 'genic.nbSNPs.txt'), 'w')

    for margin in [0, 20, 50]:

        out_genic_margin = os.path.join(out_genic,
                                        'genic-margin' + str(margin))

        in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(margin),
                                       'genic-margin' + str(margin))
        in_nongenic_margin = os.path.join(in_nongenic,
                                          'nongenic-margin' + str(margin),
                                          'nongenic-margin' + str(margin))

        # input both genic and non-genic and genic grm for variance partitioning
        print(out_genic_margin + '.test.txt')
        in_file = open(out_genic_margin + '.test.txt', 'w')
        in_file.write(in_genic_margin + '\n')
        in_file.write(in_nongenic_margin)
        in_file.close()

        # save the number of SNPs associated with each subgroup
        lc_genic = preprocessing.read_grm_bin_n(in_genic_margin)
        lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin)
        in_filenb.write('genic-margin' + str(margin) + ' ' + str(lc_genic) +
                        '\n')
        in_filenb.write('nongenic-margin' + str(margin) + ' ' +
                        str(lc_nongenic) + '\n')

        for pheno in PHE_LIST:
            for lrt in [1, 2]:
                # --reml-lrt  1
                # Calculate the log likelihood of a reduce model with one or multiple genetic
                # variance components dropped from the full model and calculate the LRT and p-value.
                # By default, GCTA will always calculate and report the LRT for the first genetic
                # variance component, i.e. --reml-lrt 1, unless you re-specify this option,
                # e.g. --reml-lrt 2 assuming there are a least two genetic variance components
                # included in the analysis. You can also test multiple components simultaneously,
                # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details.
                out_file = out_genic_margin + '.' + str(lrt) + '.' + pheno

                print(pheno)
                phenopath = os.path.join(PHE_DIR, pheno + '.txt')
                pars = var_par + [
                    '--pheno', phenopath, '--reml-lrt',
                    str(lrt), '--reml'
                ]

                preprocessing.gcta_hsq(in_file=in_file.name,
                                       out_file=out_file,
                                       gcta=GCTA,
                                       mygcta=MYGCTA,
                                       ncpus=NBPROC,
                                       other_gcta_par=pars,
                                       par_input='--mgrm-bin',
                                       sbatch=USE_SBATCH,
                                       sbatch_par_j="hsq-genic")

    in_filenb.close()
コード例 #6
0
def main(config_file):
    """Entry point if called as an executable"""

    config = config_dataset.config_dataset(config_file)
    # quantitative covariables
    qcovar_par = []
    for qcov in config.quant_covar:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    # qualitative covariables
    covar_par = []
    for cov in config.qual_covar:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    # ========= 4. MAF  =============

    in_dir_grm_maf = os.path.join(config.grm_dir, 'grm-maf')
    out_hsq_maf = os.path.join(config.hsq_dir, 'hsq-maf')
    maf_intervals = config.maf_intervals

    if not os.path.exists(out_hsq_maf):
        os.makedirs(out_hsq_maf)

    # write the number of SNPs associated with each subgroup
    in_filenb = open(os.path.join(out_hsq_maf, 'maf.nbSNPs.txt'), 'w')
    # write the GRM used to partitionate h^2
    in_file = open(os.path.join(out_hsq_maf, 'maf.test.txt'), 'w')
    for maf_int in maf_intervals:
        maf_int_char = str(maf_int[0]) + '-' + str(maf_int[1])
        in_file_grm_mafint = os.path.join(in_dir_grm_maf, 'maf' + str(maf_int_char),
                                          'maf.' + str(maf_int_char))
        in_file.write(in_file_grm_mafint + '\n')

        # extract number of SNPs
        nsnp = preprocessing.read_grm_bin_n(in_file_grm_mafint)
        in_filenb.write('maf.' + str(maf_int_char) + ' ' + str(nsnp) + '\n')

    in_file.close()
    in_filenb.close()

    for pheno in config.phe_list:

        for lrt in [1,2,3,4]:

            out_file = os.path.join(out_hsq_maf, 'maf' + '.' + str(lrt) + '.' + pheno)

            phenopath = os.path.join(config.phe_dir, pheno + '.txt')
            pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)]

            preprocessing.gcta_hsq(in_file=in_file.name,
                                   out_file=out_file,
                                   gcta=config.gcta,
                                   mygcta=config.mygcta,
                                   ncpus=config.nbproc,
                                   other_gcta_par=pars,
                                   par_input='--mgrm-bin',
                                   sbatch=config.use_sbatch,
                                   sbatch_par_j="hsq-maf")
コード例 #7
0
def main(config_file):
    """Entry point if called as an executable"""

    config = config_dataset.config_dataset(config_file)
    # quantitative covariables
    qcovar_par = []
    for qcov in config.quant_covar:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    # qualitative covariables
    covar_par = []
    for cov in config.qual_covar:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    # ========= per chr =============

    in_dir_grm_perchr = os.path.join(config.grm_dir, 'grm-all-' + str(config.grm_cutoff), 'all-' +
                                     str(config.grm_cutoff) + '-chr')
    out_hsq_perchr = os.path.join(config.hsq_dir, 'hsq-perchr')

    if not os.path.exists(out_hsq_perchr):
        os.makedirs(out_hsq_perchr)

    # extract number of SNPs per chromosome
    in_filenb = open(os.path.join(out_hsq_perchr, 'perchr.nbSNPs.txt'), 'w')
    for chrom in list(range(1, 23)):
        nsnp = preprocessing.read_grm_bin_n(in_dir_grm_perchr + str(chrom))
        in_filenb.write('chr' + str(chrom) + ' ' + str(nsnp) + '\n')
    in_filenb.close()

    for pheno in config.phe_list:

        phenopath = os.path.join(config.phe_dir, pheno + '.txt')
        pars = var_par + ['--pheno', phenopath, str(config.reml_call)]

        for chrom in list(range(1, 23)):

            in_file = in_dir_grm_perchr + str(chrom)
            out_file = os.path.join(out_hsq_perchr, 'chr' + str(chrom) + '.' + pheno)

            preprocessing.gcta_hsq(in_file=in_file,
                                   out_file=out_file,
                                   gcta=config.gcta,
                                   mygcta=config.mygcta,
                                   other_gcta_par=pars,
                                   ncpus=config.nbproc,
                                   sbatch=config.use_sbatch,
                                   sbatch_par_j="hsq-perchr")

    # sum across chr
    in_file_allchr = open(os.path.join(out_hsq_perchr, 'perchr.test.txt'), 'w')
    for chrom in list(range(1, 23)):
        in_file_allchr.write(in_dir_grm_perchr + str(chrom) + '\n')
    in_file_allchr.close()

    for pheno in config.phe_list:

        out_file = os.path.join(out_hsq_perchr, 'allchr' + '.' + pheno)

        phenopath = os.path.join(config.phe_dir, pheno + '.txt')
        pars = var_par + ['--reml-maxit', str(200), '--pheno', phenopath, str(config.reml_call)]

        preprocessing.gcta_hsq(in_file=in_file_allchr.name,
                               out_file=out_file,
                               gcta=config.gcta,
                               mygcta=config.mygcta,
                               other_gcta_par=pars,
                               par_input='--mgrm-bin',
                               ncpus=config.nbproc,
                               sbatch=config.use_sbatch,
                               sbatch_par_j="hsq-perchr")
コード例 #8
0
def main():
    """Entry point if called as an executable"""
    ## quantitative covariables
    qcovar_par = []
    for qcov in QUANT_COVAR:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par


    ### ========= 4. gene lists =============

    margin = 50

    for namesel, grmsel in [('neurodev', os.path.join(GRM_DIR, 'grm-neurodev')),
                            ('cnsexpression', os.path.join(GRM_DIR, 'grm-cnsexpression'))
                           ]:

        print("Gene set name:", namesel)
        print("GRMs:", grmsel)

        in_genesel = os.path.join(GRM_DIR, 'grm-' + namesel) # + -0.025 ?
        in_genesel_margin = os.path.join(in_genesel,
                                         namesel + '-margin' + str(margin),
                                         namesel)
        in_nongenesel_margin = os.path.join(in_genesel,
                                            'non' + namesel + '-margin' + str(margin),
                                            'non'+namesel)
        in_nongenic_margin = os.path.join(GRM_DIR, 'grm-genic', 'nongenic-margin' + str(margin),
                                          'nongenic-margin' + str(margin))

        out_genesel = os.path.join(HSQ_DIR, 'hsq-' + namesel) # + '-margin' + str(margin))
        print("hsq outputs:", out_genesel)


        if not os.path.exists(out_genesel):
            os.makedirs(out_genesel)

        # input both snp in gene lists and snp not in gene lists grm for variance partitioning
        in_file = open(os.path.join(out_genesel, namesel + '.test.txt'), 'w')
        in_file.write(in_genesel_margin + '\n')
        in_file.write(in_nongenesel_margin + '\n')
        in_file.write(in_nongenic_margin + '\n')
        in_file.close()

        # save the number of SNPs associated with each subgroup
        lc_genesel = preprocessing.read_grm_bin_n(in_genesel_margin)
        lc_nongenesel = preprocessing.read_grm_bin_n(in_nongenesel_margin)
        lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin)
        in_filenb = open(os.path.join(out_genesel, namesel + '.nbSNPs.txt'), 'w')
        in_filenb.write('non' + namesel + ' ' + str(lc_nongenesel) + '\n')
        in_filenb.write(namesel + ' ' + str(lc_genesel) + '\n')
        in_filenb.write('nongenic' + ' ' + str(lc_nongenic) + '\n')
        in_filenb.close()


        for pheno in PHE_LIST:

            print('running h^2 gcta estimation for phenotype: ' + pheno)

            for lrt in [1, 2, 3]:

                out_file = os.path.join(out_genesel, namesel + '.' + str(lrt) + '.' + pheno)

                phenopath = os.path.join(PHE_DIR, pheno + '.txt')
                pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), '--reml']

                preprocessing.gcta_hsq(in_file=in_file.name,
                                       out_file=out_file,
                                       gcta=GCTA,
                                       mygcta=MYGCTA,
                                       ncpus=NBPROC,
                                       other_gcta_par=pars,
                                       par_input='--mgrm-bin',
                                       sbatch=USE_SBATCH,
                                       sbatch_par_j="hsq-genesel")
コード例 #9
0
def main():
    """Entry point if called as an executable"""
    ## Run if .hsq file was already computed ?
    # If TRUE, will compute again the hsq,
    # otherwise won't compute the hsq if the .hsq file is present
    overwrite = False

    ## quantitative covariables
    qcovar_par = []
    for qcov in QUANT_COVAR:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    ## qualitative covariables
    covar_par = []
    for cov in QUAL_COVAR:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    ### ========= 2. All SNPS, 10 pcs, bivariate analysis =============

    out_biv = os.path.join(HSQ_DIR, 'hsq-biv')
    in_file = os.path.join(GRM_DIR, 'grm-all-0.025/all-0.025')
    treated = []

    for pheno1 in PHE_LIST:
        for pheno2 in PHE_LIST:

            treated.append(pheno1 + '_' + pheno2)
            if (pheno1 != pheno2 and (not pheno1 + '.' + pheno2 in treated)
                    and (not pheno2 + '.' + pheno1 in treated)):

                pheno1path = os.path.join(PHE_DIR, pheno1 + '.txt')
                pheno2path = os.path.join(PHE_DIR, pheno2 + '.txt')
                phenopair = os.path.splitext(
                    pheno1)[0] + '.' + os.path.splitext(pheno2)[0]

                out_biv_all = os.path.join(out_biv, phenopair)

                pars = var_par + [
                    '--pheno', pheno1path, '--pheno', pheno2path,
                    '--reml-maxit',
                    str(200), '--reml-bivar', '--reml-bendV'
                ]

                if not os.path.isfile(out_biv_all + '.hsq') and not overwrite:
                    preprocessing.gcta_hsq(in_file=in_file,
                                           out_file=out_biv_all,
                                           gcta=GCTA,
                                           mygcta=MYGCTA,
                                           other_gcta_par=pars,
                                           ncpus=NBPROC,
                                           sbatch=USE_SBATCH,
                                           sbatch_par_j="hsq-biv",
                                           sbatch_par_p="common",
                                           sbatch_par_qos="normal")

                out_biv_rg0 = os.path.join(out_biv, 'all.rg=0.' + phenopair)
                pars_rg0 = pars + ['--reml-bivar-lrt-rg', str(0)]

                if not os.path.isfile(out_biv_rg0 + '.hsq') and not overwrite:
                    preprocessing.gcta_hsq(in_file=in_file,
                                           out_file=out_biv_rg0,
                                           gcta=GCTA,
                                           mygcta=MYGCTA,
                                           ncpus=NBPROC,
                                           other_gcta_par=pars_rg0,
                                           sbatch=USE_SBATCH,
                                           sbatch_par_j="hsq-biv",
                                           sbatch_par_p="common",
                                           sbatch_par_qos="normal")

                out_biv_rg1 = os.path.join(out_biv, 'all.rg=1.' + phenopair)
                pars_rg1 = pars + ['--reml-bivar-lrt-rg', str(1)]

                if not os.path.isfile(out_biv_rg1 + '.hsq') and not overwrite:
                    preprocessing.gcta_hsq(in_file=in_file,
                                           out_file=out_biv_rg1,
                                           gcta=GCTA,
                                           mygcta=MYGCTA,
                                           ncpus=NBPROC,
                                           other_gcta_par=pars_rg1,
                                           sbatch=USE_SBATCH,
                                           sbatch_par_j="hsq-biv",
                                           sbatch_par_p="common",
                                           sbatch_par_qos="normal")
コード例 #10
0
def main(config_file):
    """Entry point if called as an executable"""
    # Run if .hsq file was already computed ?
    # If TRUE, will compute again the hsq,
    # otherwise won't compute the hsq if the .hsq file is present
    overwrite = False

    config = config_dataset.config_dataset(config_file)

    # quantitative covariables
    qcovar_par = []
    for qcov in config.quant_covar:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    # qualitative covariables
    covar_par = []
    for cov in config.qual_covar:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    # ========= 2. All SNPS, 10 pcs, bivariate analysis =============

    out_biv = os.path.join(config.hsq_dir, 'hsq-biv')
    in_file = os.path.join(config.grm_dir, 'grm-all-0.025/all-0.025')
    treated = []

    for pheno1 in config.phe_list:
        for pheno2 in config.phe_list:

            treated.append(pheno1+'_'+pheno2)
            if (pheno1 != pheno2 and (pheno1+'.'+pheno2 not in treated) and
                    (pheno2+'.'+pheno1 not in treated)):

                pheno1path = os.path.join(config.phe_dir, pheno1 + '.txt')
                pheno2path = os.path.join(config.phe_dir, pheno2 + '.txt')
                phenopair = os.path.splitext(pheno1)[0] + '.' + os.path.splitext(pheno2)[0]

                out_biv_all = os.path.join(out_biv, phenopair)

                if config.reml_bivar_call == '--reml-bivar-no-constrain':
                    pars = var_par + ['--pheno', pheno1path,
                                      '--pheno', pheno2path,
                                      '--reml-maxit', str(200),
                                      '--reml-bivar', str(config.reml_bivar_call)]

                else:
                    pars = var_par + ['--pheno', pheno1path,
                                      '--pheno', pheno2path,
                                      '--reml-maxit', str(200),
                                      '--reml-bivar']  # , '--reml-bendV']

                # if not os.path.isfile(out_biv_all+'.hsq') or overwrite:
                #     preprocessing.gcta_hsq(in_file=in_file,
                #                            out_file=out_biv_all,
                #                            gcta=config.gcta,
                #                            mygcta=config.mygcta,
                #                            other_gcta_par=pars,
                #                            ncpus=config.nbproc,
                #                            sbatch=config.use_sbatch,
                #                            sbatch_par_j="hsq-biv",
                #                            sbatch_par_p="dedicated",  # "common",
                #                            sbatch_par_qos="fast")  # "normal")

                out_biv_rg0 = os.path.join(out_biv, 'all.rg=0.' + phenopair)
                pars_rg0 = pars + ['--reml-bivar-lrt-rg', str(0)]

                if not os.path.isfile(out_biv_rg0+'.hsq') or overwrite:
                    preprocessing.gcta_hsq(in_file=in_file,
                                           out_file=out_biv_rg0,
                                           gcta=config.gcta,
                                           mygcta=config.mygcta,
                                           ncpus=config.nbproc,
                                           other_gcta_par=pars_rg0,
                                           sbatch=config.use_sbatch,
                                           sbatch_par_j="hsq-biv",
                                           sbatch_par_p="dedicated",  # "common",
                                           sbatch_par_qos="fast")  # "normal")
コード例 #11
0
def main(config_file):
    """Entry point if called as an executable"""
    # quantitative covariables

    config = config_dataset.config_dataset(config_file)

    qcovar_par = []
    for qcov in config.quant_covar:
        qcovar_par.append('--qcovar')
        qcovar_par.append(qcov)

    # qualitative covariables
    covar_par = []
    for cov in config.qual_covar:
        covar_par.append('--covar')
        covar_par.append(cov)

    var_par = qcovar_par + covar_par

    margins = [0, 10, 20, 30, 40, 50]

    # ========= 3.1 Genic/non-genic  =============

    in_genic = os.path.join(config.grm_dir, 'grm-genic')  # + -0.025 ?
    in_nongenic = os.path.join(config.grm_dir, 'grm-genic')  # + -0.025 ?
    out_genic = os.path.join(config.hsq_dir, 'hsq-genic')

    if not os.path.exists(out_genic):
        os.makedirs(out_genic)

    in_filenb = open(os.path.join(out_genic, 'genic.nbSNPs.txt'), 'w')

    for margin in margins:

        in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(margin),
                                       'genic-margin' + str(margin))
        in_nongenic_margin = os.path.join(in_nongenic, 'nongenic-margin' + str(margin),
                                          'nongenic-margin' + str(margin))

        out_genic_margin = os.path.join(out_genic, 'genic-margin' + str(margin))

        # input both genic and non-genic and genic grm for variance partitioning
        print(out_genic_margin + '.test.txt')
        with open(out_genic_margin + '.test.txt', 'w') as in_file:
            in_file.write(in_genic_margin + '\n')
            in_file.write(in_nongenic_margin)

        # save the number of SNPs associated with each subgroup
        lc_genic = preprocessing.read_grm_bin_n(in_genic_margin)
        lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin)
        in_filenb.write('{} {}\n'.format(os.path.basename(in_genic_margin), lc_genic))
        in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic))

        for pheno in config.phe_list:
            for lrt in [1, 2]:
                # --reml-lrt  1
                # Calculate the log likelihood of a reduce model with one or multiple genetic
                # variance components dropped from the full model and calculate the LRT and p-value.
                # By default, GCTA will always calculate and report the LRT for the first genetic
                # variance component, i.e. --reml-lrt 1, unless you re-specify this option,
                # e.g. --reml-lrt 2 assuming there are a least two genetic variance components
                # included in the analysis. You can also test multiple components simultaneously,
                # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details.
                out_file = out_genic_margin + '.' + str(lrt) + '.' + pheno

                print(pheno)
                phenopath = os.path.join(config.phe_dir, pheno + '.txt')
                pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)]

                preprocessing.gcta_hsq(in_file=in_file.name,
                                       out_file=out_file,
                                       gcta=config.gcta,
                                       mygcta=config.mygcta,
                                       ncpus=config.nbproc,
                                       other_gcta_par=pars,
                                       par_input='--mgrm-bin',
                                       sbatch=config.use_sbatch,
                                       sbatch_par_j="hsq-genic")

    # ========= 3.2 Genic / xxk upstream and downstream / non-genic  =============

    for margin in margins:

        if margin > 0:

            in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(0),
                                           'genic-margin' + str(0))
            in_updown = os.path.join(in_genic, 'updown-margin' + str(margin),
                                     'updown-margin' + str(margin))
            in_nongenic_margin = os.path.join(in_genic, 'nongenic-margin' + str(margin),
                                              'nongenic-margin' + str(margin))

            out_updown_margin = os.path.join(out_genic, 'updown-margin' + str(margin))

            # input genic, genic +/- marginkb, and non-genic grm for variance partitioning
            print(out_updown_margin + '.test.txt')
            with open(out_updown_margin + '.test.txt', 'w') as in_file:
                in_file.write(in_genic_margin + '\n')
                in_file.write(in_updown + '\n')
                in_file.write(in_nongenic_margin)

            # save the number of SNPs associated with each subgroup
            lc_genic = preprocessing.read_grm_bin_n(in_genic_margin)
            lc_nongenic = preprocessing.read_grm_bin_n(in_nongenic_margin)
            lc_updown = preprocessing.read_grm_bin_n(in_updown)
            in_filenb.write('{} {}\n'.format(os.path.basename(in_genic_margin), lc_genic))
            in_filenb.write('{} {}\n'.format(os.path.basename(in_updown), lc_updown))
            in_filenb.write('{} {}\n'.format(os.path.basename(in_nongenic_margin), lc_nongenic))

            for pheno in config.phe_list:
                for lrt in [1, 2]:
                    # --reml-lrt  1
                    # Calculate the log likelihood of a reduce model with one or multiple genetic
                    # variance components dropped from the full model and calculate the LRT and p-value.
                    # By default, GCTA will always calculate and report the LRT for the first genetic
                    # variance component, i.e. --reml-lrt 1, unless you re-specify this option,
                    # e.g. --reml-lrt 2 assuming there are a least two genetic variance components
                    # included in the analysis. You can also test multiple components simultaneously,
                    # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details.
                    out_file = out_updown_margin + '.' + str(lrt) + '.' + pheno

                    print(pheno)
                    phenopath = os.path.join(config.phe_dir, pheno + '.txt')
                    pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)]

                    preprocessing.gcta_hsq(in_file=in_file.name,
                                           out_file=out_file,
                                           gcta=config.gcta,
                                           mygcta=config.mygcta,
                                           ncpus=config.nbproc,
                                           other_gcta_par=pars,
                                           par_input='--mgrm-bin',
                                           sbatch=config.use_sbatch,
                                           sbatch_par_j="hsq-genic")





    # ========= 3.2 Genic / 0-20k and 20-50k upstream and downstream / non-genic  =============


    if (20 in margins and 50 in margins):

        in_updown1 = os.path.join(in_genic, 'updown-margin' + str(20),
                                 'updown-margin' + str(20))
        in_updown2 = os.path.join(in_genic, 'updown-margin' + "20-50",
                                 'updown-margin' + "20-50")
        in_genic_margin = os.path.join(in_genic, 'genic-margin' + str(0),
                                       'genic-margin' + str(0))
        in_nongenic_margin = os.path.join(in_genic, 'nongenic-margin' + str(50),
                                          'nongenic-margin' + str(50))

        out_updown_margin = os.path.join(out_genic, 'updown-margin' + "20-50")

        # input genic, genic +/- marginkb, and non-genic grm for variance partitioning
        print(out_updown_margin + '.test.txt')
        in_file = open(out_updown_margin + '.test.txt', 'w')
        in_file.write(in_genic_margin + '\n')
        in_file.write(in_updown1 + '\n')
        in_file.write(in_updown2 + '\n')
        in_file.write(in_nongenic_margin)
        in_file.close()

        # save the number of SNPs associated with each subgroup
        lc_updown2 = preprocessing.read_grm_bin_n(in_updown2)
        in_filenb.write('updown-margin' + "20-50" + ' ' + str(lc_updown2) + '\n')
        print(lc_updown2)

        for pheno in config.phe_list:
            for lrt in [1, 2, 3, 4]:
                # --reml-lrt  1
                # Calculate the log likelihood of a reduce model with one or multiple genetic
                # variance components dropped from the full model and calculate the LRT and p-value.
                # By default, GCTA will always calculate and report the LRT for the first genetic
                # variance component, i.e. --reml-lrt 1, unless you re-specify this option,
                # e.g. --reml-lrt 2 assuming there are a least two genetic variance components
                # included in the analysis. You can also test multiple components simultaneously,
                # e.g. --reml-lrt 1 2 4. See FAQ #1 for more details.
                out_file = out_updown_margin + '.' + str(lrt) + '.' + pheno

                print(pheno)
                phenopath = os.path.join(config.phe_dir, pheno + '.txt')
                pars = var_par + ['--pheno', phenopath, '--reml-lrt', str(lrt), str(config.reml_call)]

                preprocessing.gcta_hsq(in_file=in_file.name,
                                       out_file=out_file,
                                       gcta=config.gcta,
                                       mygcta=config.mygcta,
                                       ncpus=config.nbproc,
                                       other_gcta_par=pars,
                                       par_input='--mgrm-bin',
                                       sbatch=config.use_sbatch,
                                       sbatch_par_j="hsq-genic")

    in_filenb.close()