def evaluate(tree_file, ref_msa, out_dir): util.expect_file_exists(tree_file) util.expect_file_exists(ref_msa) util.make_path(out_dir) prefix = "eval" cmd = [] cmd.append(common.raxml) cmd.append('--evaluate') cmd.append('--msa') cmd.append(ref_msa) cmd.append('--model') cmd.append(common.subst_model) cmd.append('--tree') cmd.append(tree_file) cmd.append('--prefix') cmd.append(prefix) cmd.append('--threads') cmd.append(str(common.iqtree_threads)) cmd.append('--blopt') cmd.append('nr_safe') cmd.append('--redo') cmd.append('--force') cmd.append('perf_threads') cmd.append("--blmin") cmd.append(common.raxml_min_bl) sub.check_call(cmd, cwd=out_dir, stdout=sub.DEVNULL) modelfile = os.path.join(out_dir, prefix + ".raxml.bestModel") return modelfile
def convert(in_type, out_type, in_file, out_file): util.expect_file_exists( in_file ) util.clean_file(out_file) cmd = [] cmd.append(common.genesis_convert) cmd.append(in_type) cmd.append(out_type) cmd.append(in_file) cmd.append(out_file) subprocess.check_call(cmd)
def remove_duplicates(input_msa, output_msa, output_json): util.expect_file_exists(input_msa) util.clean_file(output_msa) util.clean_file(output_json) cmd = [] cmd.append(common.genesis_reduce_duplicates) cmd.append(input_msa) cmd.append(output_msa) cmd.append(output_json) subprocess.check_call(cmd)
def launch_raxml(alignment, model, output_dir, seed, starting_trees=1, parsimony=False, bs_trees=0, cores=16, debug=False, parse=False): util.expect_file_exists(alignment) util.make_path(output_dir) prefix = os.path.join(output_dir, "raxml") prefix += "_" + model.replace("+", "") cmd = [] cmd.append("mpiexec") cmd.append("-np") cmd.append(str(cores)) cmd.append(common.raxml) cmd.append("--msa") cmd.append(alignment) cmd.append("--model") cmd.append(model) cmd.append("--seed") cmd.append(str(seed)) cmd.append("--blmin") cmd.append(common.raxml_min_bl) if (parse): cmd.append("--parse") cmd.append("--tree") if (starting_trees > 0): if (not parsimony): cmd.append("rand{" + str(starting_trees) + "}") prefix += "_rand" + str(starting_trees) else: cmd.append("pars{" + str(starting_trees) + "}") prefix += "_pars" + str(starting_trees) if (bs_trees > 0): cmd.append("--boostrap") cmd.append("--bs-trees") cmd.append(str(bs_trees)) prefix += "_bs" + str(bs_trees) prefix += "_seed" + str(seed) cmd.append("--prefix") cmd.append(prefix) cmd.append('--force') cmd.append('perf_threads') launcher.submit(prefix, cmd, cores, debug)
def outgroup_check(jplace_files, out_dir): for f in jplace_files: util.expect_file_exists( f ) util.make_path( out_dir ) cmd = [] cmd.append(common.genesis_outgroup_check) for f in jplace_files: cmd.append(f) outfile = os.path.join( out_dir, "outgroup_check.txt" ) with open( outfile, "w+" ) as logfile: sub.check_call(cmd, stdout=logfile) return outfile
def trim_separate_align(input_fasta, mode, runsdir, version_dir): util.expect_file_exists(input_fasta) util.clean_dir(runsdir) util.mkdirp(runsdir) cmd = [] cmd.append(common.preanalysis1) cmd.append(input_fasta) cmd.append(mode) cmd.append(common.scripts_dir) cmd.append(common.mafft) cmd.append(common.outgroup_spec) cmd.append(version_dir) cmd.append(str(common.available_cores)) print(" ".join(cmd)) subprocess.check_call(cmd, cwd=runsdir)
def split_alignment_outgroups(input_msa, outgroup_spec, out_dir): util.expect_file_exists( input_msa ) util.expect_file_exists( outgroup_spec ) util.make_path( out_dir ) new_ref_msa="reference.fasta" query_msa="query.fasta" cmd = [] cmd.append(common.genesis_remove_sequences) cmd.append(input_msa) cmd.append(outgroup_spec) cmd.append(new_ref_msa) cmd.append(query_msa) sub.check_call(cmd, cwd=out_dir) return os.path.join(out_dir, new_ref_msa), os.path.join(out_dir, query_msa)
sys.path.insert(0, 'scripts') import common import placement import convert import util import raxml_launcher paths = common.Paths( sys.argv ) # tree = paths.raxml_best_tree modelfile = paths.raxml_best_model epa_out_dir = paths.epa_runs_dir hmmer_out_dir = paths.hmmer_runs_dir try: util.expect_file_exists( paths.raxml_credible_ml_trees ) except Exception as e: print("ERROR: Must run iqtree_tests stage of pipeline first") raise e # ================================================================ # start by ensuring we have the outgroups aligned against the ref # ================================================================ util.make_path_clean( epa_out_dir ) # if outgroup is included in the alignment, separate the two from the alignment currently seen as final if paths.dataset_has_outgroups: ref_msa = paths.alignment query_msa = paths.outgroups_file # ref_msa, query_msa = placement.split_alignment_outgroups( paths.alignment, common.outgroup_spec, epa_out_dir )
with open(paths.raxml_credible_ml_trees) as ml_trees_file: ml_trees = ml_trees_file.readlines() total = len(ml_trees) - 1 i = 0 for tree in ml_trees: print(i, " / ", total) # subdirs per tree cur_outdir = os.path.join(runs_dir, str(i)) epa_result_subdir = os.path.join(paths.epa_rooting_dir, str(i)) util.expect_dir_exists(epa_result_subdir) i += 1 util.make_path_clean(cur_outdir) # get the same treefile as used in the normal epa runs tree_file = os.path.join(epa_result_subdir, "tree.newick") util.expect_file_exists(tree_file) # fetch the previously created model file cur_modelfile = os.path.join(epa_result_subdir, "eval.raxml.bestModel") util.expect_file_exists(cur_modelfile) # place the wuhan seq placement.launch_epa(tree_file, cur_modelfile, ref_msa, query_msa, cur_outdir, thorough=True) result_files.append(os.path.join(cur_outdir, "epa_result.jplace"))