def snp_sel_v_neu_anavar(snp_sfs, snp_m, n_sfs, n_m, constraint, bootstrap, n, c, dfe, out_stem, fold, degree, search): anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/' anavar_cmd = '{path}anavar1.22 {ctl} {rslts} {log}' for i in [0] + range(1, bootstrap+1): # sort sfs if i == 0: sfs = snp_sfs sfs_n = n_sfs else: sfs = resample_replace(snp_sfs) sfs_n = resample_replace(n_sfs) # convert to correct format for anavar sfs = sfs2counts(sfs, n) sfs_n = sfs2counts(sfs_n, n) # sort file names ctl_name = out_stem + '.rep{}.control.txt'.format(i) result_name = out_stem + '.rep{}.results.txt'.format(i) log_name = out_stem + '.rep{}.log.txt'.format(i) # construct control file sfs_m = {'selected_SNP': (sfs, snp_m), 'neutral_SNP': (sfs_n, n_m)} ctl = an.SNPNeuSelControlFile() ctl.set_alg_opts(search=search, alg='NLOPT_LD_SLSQP') ctl.set_data(sfs_m, n, dfe=dfe, c=c, gamma_r=(-5e5, 1e3), theta_r=(1e-10, 0.1), r_r=(0.01, 100), snp_fold=fold) if degree != 50: ctl.set_dfe_optional_opts(degree=degree, optional=True) ctl.set_constraint(constraint) ctl_contents = ctl.construct() with open(ctl_name, 'w') as control: control.write(ctl_contents) # call anavar rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name) subprocess.call(rep_cmd, shell=True)
def sel_v_neu_anavar_nonsense(vcf, call, constraint, n, c, dfe, alg, nnoimp, maximp, out_stem, search, degree, spread, evolgen, prem_files): """ submits anavar jobs to cluster after writing required files etc :param vcf: str :param call: dict :param constraint: str :param n: int :param c: int :param dfe: str :param alg: str :param nnoimp: int :param maximp: int :param out_stem: str :param search: int :param degree: int :param spread: int :param evolgen: bool :param prem_files: list :return: None """ anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/' anavar_cmd = '{path}anavar1.22 {ctl} {rslts} {log} {seed}' # sort file names ctl_name = out_stem + '.control.txt' # get nonsense data in nonsense_dict = gather_chromo_prems(prem_files) sel_sfs, sel_m = prem_freqs_call(nonsense_dict) # make control file sfs_data = prepare_nonsense_snp_sfs(vcf, call, n, sel_sfs, sel_m) ctl = an.SNPNeuSelControlFile() ctl.set_alg_opts(search=search, alg=alg, key=3, epsabs=1e-20, epsrel=1e-9, rftol=1e-9, maxtime=3600, optional=True, maximp=maximp, nnoimp=nnoimp) ctl.set_data(sfs_data, n, dfe=dfe, c=c, gamma_r=(-5e4, 1e3), theta_r=(1e-10, 0.1), r_r=(0.01, 100), scale_r=(0.1, 5000.0)) if degree != 50: ctl.set_dfe_optional_opts(degree=degree, optional=True) ctl.set_constraint(constraint) ctl_contents = ctl.construct() with open(ctl_name, 'w') as control: control.write(ctl_contents) res_file_list = out_stem + '.allres.list.txt' hjids = [] with open(res_file_list, 'w') as res_list: # split into requested jobs for i in range(1, spread + 1): # seed = random.randint(1, 1e6) seed = i split_stem = '{}.split{}'.format(out_stem, i) result_name = split_stem + '.results.txt' log_name = split_stem + '.log.txt' print(result_name, file=res_list) # call anavar rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name, seed=seed) q_sub([rep_cmd], out=split_stem, jid=split_stem.split('/')[-1] + '.sh', t=8, evolgen=evolgen) hjids.append(split_stem.split('/')[-1] + '.sh') # hold job to merge outputs merge_out = out_stem + '.merged.results.txt' gather = 'cat {} | gather_searches.py {}'.format(res_file_list, merge_out) q_sub([gather], out=out_stem + '.merge', hold=hjids, evolgen=evolgen)
def sel_v_neu_anavar(mode, vcf, call, sel_region, constraint, n, c, dfe, alg, nnoimp, maximp, out_stem, search, degree, spread, evolgen, start_index, given, ar_ref): """ submits anavar jobs to cluster after writing required files etc :param mode: str :param vcf: str :param call: dict :param sel_region: str :param constraint: str :param n: int :param c: int :param dfe: str :param alg: str :param nnoimp: int :param maximp: int :param out_stem: str :param search: int :param degree: int :param spread: int :param evolgen: bool :param start_index: int :param given: bool :param ar_ref: bool :return: None """ anavar_path = '/shared/evolgen1/shared_data/program_files/sharc/' anavar_cmd = '{path}anavar1.4 {ctl} {rslts} {log} {seed}' # sort file names ctl_name = out_stem + '.control.txt' merge_out = out_stem + '.merged.results.txt' # catch given on first run init = () if given: if not os.path.isfile(merge_out): sys.exit( 'Given True but no previous runs completed to take besty res from' ) else: # get best result from merged out best_res = an.ResultsFile( open(merge_out)).ml_estimate(as_string=True) init = tuple(best_res.split()[3:-1]) # region combinations region_combs = { 'CDS': ['CDS_frameshift', 'CDS_non_frameshift'], 'intron': ['intron'], 'intergenic': ['intergenic'], 'noncoding': ['intergenic', 'intron'] } # make control file if mode == 'snp': sfs_data = prepare_snp_sfs(vcf, call, n, sel_sfs_regions=region_combs[sel_region], call_sel_reg=sel_region) ctl = an.SNPNeuSelControlFile() else: sfs_data = prepare_indel_sfs(vcf, call, n, sel_sfs_regions=region_combs[sel_region], call_sel_reg=sel_region, ar_ref=ar_ref) ctl = an.IndelNeuSelControlFile() ctl.set_alg_opts(search=search, alg=alg, key=3, epsabs=1e-20, epsrel=1e-9, rftol=1e-9, maxtime=3600, optional=True, maximp=maximp, nnoimp=nnoimp, init=init) ctl.set_data(sfs_data, n, dfe=dfe, c=c, gamma_r=(-5e4, 1e5), theta_r=(1e-14, 0.1), r_r=(0.01, 100), scale_r=(0.1, 5000.0)) if degree != 50: ctl.set_dfe_optional_opts(degree=degree, optional=True) ctl.set_constraint(constraint) ctl_contents = ctl.construct() with open(ctl_name, 'w') as control: control.write(ctl_contents) res_file_list = out_stem + '.allres.list.txt' hjids = [] with open(res_file_list, 'a') as res_list: # split into requested jobs for i in range(start_index, start_index + spread): split_stem = '{}.split{}'.format(out_stem, i) result_name = split_stem + '.results.txt' log_name = split_stem + '.log.txt' print(result_name, file=res_list) # call anavar rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name, seed=i) q_sub([rep_cmd], out=split_stem, jid=split_stem.split('/')[-1] + '.sh', t=48, evolgen=evolgen) hjids.append(split_stem.split('/')[-1] + '.sh') # hold job to merge outputs gather = 'cat {} | ~/parus_indel/anavar_analyses/gather_searches.py {}'.format( res_file_list, merge_out) q_sub([gather], out=out_stem + '.merge', hold=hjids, evolgen=evolgen)
def sel_v_neu_anavar(sfs_dat, constraint, n, c, dfe, alg, nnoimp, maximp, out_stem, search, degree, spread, start_index, given): """ submits anavar jobs to cluster after writing required files etc :param sfs_dat: dict :param constraint: str :param n: int :param c: int :param dfe: str :param alg: str :param nnoimp: int :param maximp: int :param out_stem: str :param search: int :param degree: int :param spread: int :param start_index: int :param given: bool :return: None """ anavar_path = '' anavar_cmd = '{path}anavar {ctl} {rslts} {log} {seed}' # sort file names ctl_name = out_stem + '.control.txt' merge_out = out_stem + '.merged.results.txt' # catch given on first run init = () if given: if not os.path.isfile(merge_out): sys.exit('Given True but no previous runs completed to take besty res from') else: # get best result from merged out best_res = an.ResultsFile(open(merge_out)).ml_estimate(as_string=True) init = tuple(best_res.split()[3:-1]) # make control file ctl = an.SNPNeuSelControlFile() ctl.set_alg_opts(search=search, alg=alg, key=3, epsabs=1e-20, epsrel=1e-9, rftol=1e-9, maxtime=3600, optional=True, maximp=maximp, nnoimp=nnoimp, init=init) ctl.set_data(sfs_dat, n, dfe=dfe, c=c, gamma_r=(-500, 100), theta_r=(1e-14, 0.1), r_r=(0.01, 100), scale_r=(0.1, 5000.0), snp_fold=False) if degree != 50: ctl.set_dfe_optional_opts(degree=degree, optional=True) ctl.set_constraint(constraint) ctl_contents = ctl.construct() with open(ctl_name, 'w') as control: control.write(ctl_contents) res_file_list = out_stem + '.allres.list.txt' with open(res_file_list, 'a') as res_list: # split into requested jobs for i in range(start_index, start_index+spread): split_stem = '{}.split{}'.format(out_stem, i) result_name = split_stem + '.results.txt' log_name = split_stem + '.log.txt' print(result_name, file=res_list) # call anavar rep_cmd = anavar_cmd.format(path=anavar_path, ctl=ctl_name, rslts=result_name, log=log_name, seed=i) q_sub([rep_cmd], out=split_stem, jid=split_stem.split('/')[-1] + '.sh', t=48, scheduler='SLURM')