Example #1
0
def evaluate(tree_file, ref_msa, out_dir):
    util.expect_file_exists(tree_file)
    util.expect_file_exists(ref_msa)
    util.make_path(out_dir)

    prefix = "eval"

    cmd = []
    cmd.append(common.raxml)
    cmd.append('--evaluate')
    cmd.append('--msa')
    cmd.append(ref_msa)
    cmd.append('--model')
    cmd.append(common.subst_model)
    cmd.append('--tree')
    cmd.append(tree_file)
    cmd.append('--prefix')
    cmd.append(prefix)
    cmd.append('--threads')
    cmd.append(str(common.iqtree_threads))
    cmd.append('--blopt')
    cmd.append('nr_safe')
    cmd.append('--redo')
    cmd.append('--force')
    cmd.append('perf_threads')
    cmd.append("--blmin")
    cmd.append(common.raxml_min_bl)

    sub.check_call(cmd, cwd=out_dir, stdout=sub.DEVNULL)

    modelfile = os.path.join(out_dir, prefix + ".raxml.bestModel")

    return modelfile
Example #2
0
def convert(in_type, out_type, in_file, out_file):
  util.expect_file_exists( in_file )
  util.clean_file(out_file)

  cmd = []
  cmd.append(common.genesis_convert)
  cmd.append(in_type)
  cmd.append(out_type)
  cmd.append(in_file)
  cmd.append(out_file)
  subprocess.check_call(cmd)
def remove_duplicates(input_msa, output_msa, output_json):
    util.expect_file_exists(input_msa)

    util.clean_file(output_msa)
    util.clean_file(output_json)

    cmd = []
    cmd.append(common.genesis_reduce_duplicates)
    cmd.append(input_msa)
    cmd.append(output_msa)
    cmd.append(output_json)
    subprocess.check_call(cmd)
Example #4
0
def launch_raxml(alignment,
                 model,
                 output_dir,
                 seed,
                 starting_trees=1,
                 parsimony=False,
                 bs_trees=0,
                 cores=16,
                 debug=False,
                 parse=False):
    util.expect_file_exists(alignment)
    util.make_path(output_dir)

    prefix = os.path.join(output_dir, "raxml")
    prefix += "_" + model.replace("+", "")
    cmd = []
    cmd.append("mpiexec")
    cmd.append("-np")
    cmd.append(str(cores))
    cmd.append(common.raxml)
    cmd.append("--msa")
    cmd.append(alignment)
    cmd.append("--model")
    cmd.append(model)
    cmd.append("--seed")
    cmd.append(str(seed))
    cmd.append("--blmin")
    cmd.append(common.raxml_min_bl)
    if (parse):
        cmd.append("--parse")
    cmd.append("--tree")
    if (starting_trees > 0):
        if (not parsimony):
            cmd.append("rand{" + str(starting_trees) + "}")
            prefix += "_rand" + str(starting_trees)
        else:
            cmd.append("pars{" + str(starting_trees) + "}")
            prefix += "_pars" + str(starting_trees)
    if (bs_trees > 0):
        cmd.append("--boostrap")
        cmd.append("--bs-trees")
        cmd.append(str(bs_trees))
        prefix += "_bs" + str(bs_trees)

    prefix += "_seed" + str(seed)
    cmd.append("--prefix")
    cmd.append(prefix)
    cmd.append('--force')
    cmd.append('perf_threads')
    launcher.submit(prefix, cmd, cores, debug)
def outgroup_check(jplace_files, out_dir):
  for f in jplace_files:
    util.expect_file_exists( f )
  util.make_path( out_dir )

  cmd = []
  cmd.append(common.genesis_outgroup_check)
  for f in jplace_files:
    cmd.append(f)

  outfile = os.path.join( out_dir, "outgroup_check.txt" )
  with open( outfile, "w+" ) as logfile:
    sub.check_call(cmd, stdout=logfile)

  return outfile
def trim_separate_align(input_fasta, mode, runsdir, version_dir):
    util.expect_file_exists(input_fasta)
    util.clean_dir(runsdir)
    util.mkdirp(runsdir)

    cmd = []
    cmd.append(common.preanalysis1)
    cmd.append(input_fasta)
    cmd.append(mode)
    cmd.append(common.scripts_dir)
    cmd.append(common.mafft)
    cmd.append(common.outgroup_spec)
    cmd.append(version_dir)
    cmd.append(str(common.available_cores))
    print(" ".join(cmd))
    subprocess.check_call(cmd, cwd=runsdir)
def split_alignment_outgroups(input_msa, outgroup_spec, out_dir):
  util.expect_file_exists( input_msa )
  util.expect_file_exists( outgroup_spec )
  util.make_path( out_dir )

  new_ref_msa="reference.fasta"
  query_msa="query.fasta"

  cmd = []
  cmd.append(common.genesis_remove_sequences)
  cmd.append(input_msa)
  cmd.append(outgroup_spec)
  cmd.append(new_ref_msa)
  cmd.append(query_msa)
  sub.check_call(cmd, cwd=out_dir)

  return os.path.join(out_dir, new_ref_msa), os.path.join(out_dir, query_msa)
Example #8
0
sys.path.insert(0, 'scripts')
import common
import placement
import convert
import util
import raxml_launcher

paths = common.Paths( sys.argv )

# tree = paths.raxml_best_tree
modelfile = paths.raxml_best_model
epa_out_dir = paths.epa_runs_dir
hmmer_out_dir = paths.hmmer_runs_dir

try:
  util.expect_file_exists( paths.raxml_credible_ml_trees )
except Exception as e:
  print("ERROR: Must run iqtree_tests stage of pipeline first")
  raise e

# ================================================================
# start by ensuring we have the outgroups aligned against the ref
# ================================================================

util.make_path_clean( epa_out_dir )

# if outgroup is included in the alignment, separate the two from the alignment currently seen as final
if paths.dataset_has_outgroups:
  ref_msa = paths.alignment
  query_msa = paths.outgroups_file
  # ref_msa, query_msa = placement.split_alignment_outgroups( paths.alignment, common.outgroup_spec, epa_out_dir )
Example #9
0
with open(paths.raxml_credible_ml_trees) as ml_trees_file:
    ml_trees = ml_trees_file.readlines()
    total = len(ml_trees) - 1
    i = 0
    for tree in ml_trees:
        print(i, " / ", total)
        # subdirs per tree
        cur_outdir = os.path.join(runs_dir, str(i))
        epa_result_subdir = os.path.join(paths.epa_rooting_dir, str(i))
        util.expect_dir_exists(epa_result_subdir)
        i += 1
        util.make_path_clean(cur_outdir)

        # get the same treefile as used in the normal epa runs
        tree_file = os.path.join(epa_result_subdir, "tree.newick")
        util.expect_file_exists(tree_file)

        # fetch the previously created model file
        cur_modelfile = os.path.join(epa_result_subdir, "eval.raxml.bestModel")
        util.expect_file_exists(cur_modelfile)

        # place the wuhan seq
        placement.launch_epa(tree_file,
                             cur_modelfile,
                             ref_msa,
                             query_msa,
                             cur_outdir,
                             thorough=True)

        result_files.append(os.path.join(cur_outdir, "epa_result.jplace"))