Beispiel #1
0
def snp_sel_v_neu_anavar(snp_sfs, snp_m, n_sfs, n_m, constraint, bootstrap, n, c, dfe, out_stem, fold, degree, search):

    anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/'

    anavar_cmd = '{path}anavar1.22 {ctl} {rslts} {log}'

    for i in [0] + range(1, bootstrap+1):

        # sort sfs
        if i == 0:
            sfs = snp_sfs
            sfs_n = n_sfs
        else:
            sfs = resample_replace(snp_sfs)
            sfs_n = resample_replace(n_sfs)

        # convert to correct format for anavar
        sfs = sfs2counts(sfs, n)
        sfs_n = sfs2counts(sfs_n, n)

        # sort file names
        ctl_name = out_stem + '.rep{}.control.txt'.format(i)
        result_name = out_stem + '.rep{}.results.txt'.format(i)
        log_name = out_stem + '.rep{}.log.txt'.format(i)

        # construct control file
        sfs_m = {'selected_SNP': (sfs, snp_m),
                 'neutral_SNP': (sfs_n, n_m)}
        ctl = an.SNPNeuSelControlFile()
        ctl.set_alg_opts(search=search, alg='NLOPT_LD_SLSQP')
        ctl.set_data(sfs_m, n, dfe=dfe, c=c,
                     gamma_r=(-5e5, 1e3), theta_r=(1e-10, 0.1), r_r=(0.01, 100),
                     snp_fold=fold)
        if degree != 50:
            ctl.set_dfe_optional_opts(degree=degree, optional=True)
        ctl.set_constraint(constraint)
        ctl_contents = ctl.construct()
        with open(ctl_name, 'w') as control:
            control.write(ctl_contents)

        # call anavar
        rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name)
        subprocess.call(rep_cmd, shell=True)
def sel_v_neu_anavar_nonsense(vcf, call, constraint, n, c, dfe, alg, nnoimp,
                              maximp, out_stem, search, degree, spread,
                              evolgen, prem_files):
    """
    submits anavar jobs to cluster after writing required files etc
    :param vcf: str
    :param call: dict
    :param constraint: str
    :param n: int
    :param c: int
    :param dfe: str
    :param alg: str
    :param nnoimp: int
    :param maximp: int
    :param out_stem: str
    :param search: int
    :param degree: int
    :param spread: int
    :param evolgen: bool
    :param prem_files: list
    :return: None
    """

    anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/'

    anavar_cmd = '{path}anavar1.22 {ctl} {rslts} {log} {seed}'

    # sort file names
    ctl_name = out_stem + '.control.txt'

    # get nonsense data in
    nonsense_dict = gather_chromo_prems(prem_files)
    sel_sfs, sel_m = prem_freqs_call(nonsense_dict)

    # make control file
    sfs_data = prepare_nonsense_snp_sfs(vcf, call, n, sel_sfs, sel_m)
    ctl = an.SNPNeuSelControlFile()

    ctl.set_alg_opts(search=search,
                     alg=alg,
                     key=3,
                     epsabs=1e-20,
                     epsrel=1e-9,
                     rftol=1e-9,
                     maxtime=3600,
                     optional=True,
                     maximp=maximp,
                     nnoimp=nnoimp)

    ctl.set_data(sfs_data,
                 n,
                 dfe=dfe,
                 c=c,
                 gamma_r=(-5e4, 1e3),
                 theta_r=(1e-10, 0.1),
                 r_r=(0.01, 100),
                 scale_r=(0.1, 5000.0))
    if degree != 50:
        ctl.set_dfe_optional_opts(degree=degree, optional=True)
    ctl.set_constraint(constraint)
    ctl_contents = ctl.construct()
    with open(ctl_name, 'w') as control:
        control.write(ctl_contents)

    res_file_list = out_stem + '.allres.list.txt'
    hjids = []
    with open(res_file_list, 'w') as res_list:

        # split into requested jobs
        for i in range(1, spread + 1):

            #  seed = random.randint(1, 1e6)
            seed = i

            split_stem = '{}.split{}'.format(out_stem, i)

            result_name = split_stem + '.results.txt'
            log_name = split_stem + '.log.txt'

            print(result_name, file=res_list)

            # call anavar
            rep_cmd = anavar_cmd.format(path=anavar_path,
                                        ctl=ctl_name,
                                        rslts=result_name,
                                        log=log_name,
                                        seed=seed)

            q_sub([rep_cmd],
                  out=split_stem,
                  jid=split_stem.split('/')[-1] + '.sh',
                  t=8,
                  evolgen=evolgen)
            hjids.append(split_stem.split('/')[-1] + '.sh')

    # hold job to merge outputs
    merge_out = out_stem + '.merged.results.txt'
    gather = 'cat {} | gather_searches.py {}'.format(res_file_list, merge_out)
    q_sub([gather], out=out_stem + '.merge', hold=hjids, evolgen=evolgen)
def sel_v_neu_anavar(mode, vcf, call, sel_region, constraint, n, c, dfe, alg,
                     nnoimp, maximp, out_stem, search, degree, spread, evolgen,
                     start_index, given, ar_ref):
    """
    submits anavar jobs to cluster after writing required files etc
    :param mode: str
    :param vcf: str
    :param call: dict
    :param sel_region: str
    :param constraint: str
    :param n: int
    :param c: int
    :param dfe: str
    :param alg: str
    :param nnoimp: int
    :param maximp: int
    :param out_stem: str
    :param search: int
    :param degree: int
    :param spread: int
    :param evolgen: bool
    :param start_index: int
    :param given: bool
    :param ar_ref: bool
    :return: None
    """

    anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/'

    anavar_cmd = '{path}anavar1.4 {ctl} {rslts} {log} {seed}'

    # sort file names
    ctl_name = out_stem + '.control.txt'
    merge_out = out_stem + '.merged.results.txt'

    # catch given on first run
    init = ()
    if given:
        if not os.path.isfile(merge_out):
            sys.exit(
                'Given True but no previous runs completed to take besty res from'
            )
        else:
            # get best result from merged out
            best_res = an.ResultsFile(
                open(merge_out)).ml_estimate(as_string=True)
            init = tuple(best_res.split()[3:-1])

    # region combinations
    region_combs = {
        'CDS': ['CDS_frameshift', 'CDS_non_frameshift'],
        'intron': ['intron'],
        'intergenic': ['intergenic'],
        'noncoding': ['intergenic', 'intron']
    }

    # make control file
    if mode == 'snp':
        sfs_data = prepare_snp_sfs(vcf,
                                   call,
                                   n,
                                   sel_sfs_regions=region_combs[sel_region],
                                   call_sel_reg=sel_region)
        ctl = an.SNPNeuSelControlFile()

    else:
        sfs_data = prepare_indel_sfs(vcf,
                                     call,
                                     n,
                                     sel_sfs_regions=region_combs[sel_region],
                                     call_sel_reg=sel_region,
                                     ar_ref=ar_ref)
        ctl = an.IndelNeuSelControlFile()

    ctl.set_alg_opts(search=search,
                     alg=alg,
                     key=3,
                     epsabs=1e-20,
                     epsrel=1e-9,
                     rftol=1e-9,
                     maxtime=3600,
                     optional=True,
                     maximp=maximp,
                     nnoimp=nnoimp,
                     init=init)

    ctl.set_data(sfs_data,
                 n,
                 dfe=dfe,
                 c=c,
                 gamma_r=(-5e4, 1e5),
                 theta_r=(1e-14, 0.1),
                 r_r=(0.01, 100),
                 scale_r=(0.1, 5000.0))
    if degree != 50:
        ctl.set_dfe_optional_opts(degree=degree, optional=True)
    ctl.set_constraint(constraint)
    ctl_contents = ctl.construct()
    with open(ctl_name, 'w') as control:
        control.write(ctl_contents)

    res_file_list = out_stem + '.allres.list.txt'
    hjids = []
    with open(res_file_list, 'a') as res_list:

        # split into requested jobs
        for i in range(start_index, start_index + spread):

            split_stem = '{}.split{}'.format(out_stem, i)

            result_name = split_stem + '.results.txt'
            log_name = split_stem + '.log.txt'

            print(result_name, file=res_list)

            # call anavar
            rep_cmd = anavar_cmd.format(path=anavar_path,
                                        ctl=ctl_name,
                                        rslts=result_name,
                                        log=log_name,
                                        seed=i)

            q_sub([rep_cmd],
                  out=split_stem,
                  jid=split_stem.split('/')[-1] + '.sh',
                  t=48,
                  evolgen=evolgen)
            hjids.append(split_stem.split('/')[-1] + '.sh')

    # hold job to merge outputs
    gather = 'cat {} | ~/parus_indel/anavar_analyses/gather_searches.py {}'.format(
        res_file_list, merge_out)
    q_sub([gather], out=out_stem + '.merge', hold=hjids, evolgen=evolgen)
Beispiel #4
0
def sel_v_neu_anavar(sfs_dat, constraint, n, c, dfe, alg, nnoimp, maximp,
                     out_stem, search, degree, spread, start_index, given):

    """
    submits anavar jobs to cluster after writing required files etc
    :param sfs_dat: dict
    :param constraint: str
    :param n: int
    :param c: int
    :param dfe: str
    :param alg: str
    :param nnoimp: int
    :param maximp: int
    :param out_stem: str
    :param search: int
    :param degree: int
    :param spread: int
    :param start_index: int
    :param given: bool
    :return: None
    """

    anavar_path = ''

    anavar_cmd = '{path}anavar {ctl} {rslts} {log} {seed}'

    # sort file names
    ctl_name = out_stem + '.control.txt'
    merge_out = out_stem + '.merged.results.txt'

    # catch given on first run
    init = ()
    if given:
        if not os.path.isfile(merge_out):
            sys.exit('Given True but no previous runs completed to take besty res from')
        else:
            # get best result from merged out
            best_res = an.ResultsFile(open(merge_out)).ml_estimate(as_string=True)
            init = tuple(best_res.split()[3:-1])

    # make control file
    ctl = an.SNPNeuSelControlFile()

    ctl.set_alg_opts(search=search, alg=alg, key=3,
                     epsabs=1e-20, epsrel=1e-9, rftol=1e-9,
                     maxtime=3600, optional=True,
                     maximp=maximp, nnoimp=nnoimp, init=init)

    ctl.set_data(sfs_dat, n, dfe=dfe, c=c, gamma_r=(-500, 100), theta_r=(1e-14, 0.1), r_r=(0.01, 100),
                 scale_r=(0.1, 5000.0), snp_fold=False)
    if degree != 50:
        ctl.set_dfe_optional_opts(degree=degree, optional=True)
    ctl.set_constraint(constraint)
    ctl_contents = ctl.construct()
    with open(ctl_name, 'w') as control:
        control.write(ctl_contents)

    res_file_list = out_stem + '.allres.list.txt'
    with open(res_file_list, 'a') as res_list:

        # split into requested jobs
        for i in range(start_index, start_index+spread):

            split_stem = '{}.split{}'.format(out_stem, i)

            result_name = split_stem + '.results.txt'
            log_name = split_stem + '.log.txt'

            print(result_name, file=res_list)

            # call anavar
            rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name, seed=i)

            q_sub([rep_cmd], out=split_stem, jid=split_stem.split('/')[-1] + '.sh', t=48, scheduler='SLURM')