Beispiel #1
0
def max_entropy_thinning(paths, input_alignment, taxa_number,
                         output_alignment):
    util.make_path_clean(paths.me_thinning_runs_dir)
    prefix = os.path.join(paths.me_thinning_runs_dir, "me_thinning")
    command = []
    command.append(common.genesis_max_entropy)
    command.append(input_alignment)
    command.append(str(taxa_number))
    command.append(prefix)
    print(" ".join(command))
    subprocess.check_call(command)
    shutil.move(prefix + "_pruned_alignment.fasta", output_alignment)
Beispiel #2
0
def clade_compression_thinning(paths, input_tree_filename, input_alignment, taxa_number, output_alignment):
  util.make_path_clean(paths.cc_thinning_runs_dir)
  prefix = os.path.join(paths.cc_thinning_runs_dir, "cc_thinning")
  command = []

  command.append(common.genesis_clade_compression)
  command.append(input_tree_filename)
  command.append(input_alignment)
  command.append(str(taxa_number))
  command.append(str(10000))
  command.append(prefix)
  print(" ".join(command))
  subprocess.check_call(command)
 
  shutil.move(prefix + "_pruned_alignment.fasta", output_alignment)
def launch_pargenes(alignment, model, output_dir, seed, rand_trees, pars_trees,
                    bs_trees, cores):
    util.make_path_clean(output_dir)
    debug = False
    alignment_dir = os.path.join(output_dir, "alignments")
    util.mkdirp(alignment_dir)
    alignment_symlink = os.path.join(alignment_dir, common.pargenes_ali_name)
    raxml_options_file = os.path.join(output_dir, "raxml_options.txt")
    with open(raxml_options_file, "w") as writer:
        writer.write("--model " + model + " ")
        writer.write("--blmin " + common.raxml_min_bl + " ")
        writer.write("--precision " + str(common.raxml_precision) + " ")

    relative_symlink(alignment, alignment_symlink)
    prefix = os.path.join(output_dir, "pargenes")
    cmd = []
    cmd.append("python")
    cmd.append(common.pargenes)
    cmd.append("-a")
    cmd.append(alignment_dir)
    cmd.append("-o")
    cmd.append(os.path.join(output_dir, "pargenes_output"))
    cmd.append("-r")
    cmd.append(raxml_options_file)
    cmd.append("--seed")
    cmd.append(str(seed))
    cmd.append("-s")
    cmd.append(str(rand_trees))
    cmd.append("-p")
    cmd.append(str(pars_trees))
    cmd.append("-b")
    cmd.append(str(bs_trees))
    cmd.append("-c")
    cmd.append(str(cores))
    cmd.append("--core-assignment")
    cmd.append("low")

    print(" ".join(cmd))
    launcher.submit(prefix, cmd, cores, debug)
Beispiel #4
0
# tree = paths.raxml_best_tree
modelfile = paths.raxml_best_model
epa_out_dir = paths.epa_runs_dir
hmmer_out_dir = paths.hmmer_runs_dir

try:
  util.expect_file_exists( paths.raxml_credible_ml_trees )
except Exception as e:
  print("ERROR: Must run iqtree_tests stage of pipeline first")
  raise e

# ================================================================
# start by ensuring we have the outgroups aligned against the ref
# ================================================================

util.make_path_clean( epa_out_dir )

# if outgroup is included in the alignment, separate the two from the alignment currently seen as final
if paths.dataset_has_outgroups:
  ref_msa = paths.alignment
  query_msa = paths.outgroups_file
  # ref_msa, query_msa = placement.split_alignment_outgroups( paths.alignment, common.outgroup_spec, epa_out_dir )
else:
  ref_msa = paths.alignment

  # create outgroup alignment using hmmer
  util.make_path_clean( hmmer_out_dir )

  #create the hmm profile
  hmm_profile = placement.launch_hmmbuild( ref_msa, hmmer_out_dir )
  # align outgroups against it
Beispiel #5
0
paths = common.Paths(sys.argv)

# lay some stones in the wrong path
try:
    util.expect_dir_exists(paths.epa_rooting_dir)
except Exception as e:
    print("ERROR: Must run placement stage of pipeline first")
    raise e

# get a separate workdir for this task
runs_dir = paths.wuhan_placement_runs_dir
# also a separate results dir
result_dir = paths.wuhan_placement_dir

util.make_path_clean(runs_dir)
util.make_path_clean(result_dir)

# get the wuhan sequence out of the master raw file into its own file in
# the runs dir
wuhan_fasta = os.path.join(runs_dir, "sequence.fasta")

placement.extract_sequence(paths.raw_sequences, "EPI_ISL_406801", wuhan_fasta)

ref_msa = paths.alignment

# check if there already is a hmmprofile (should be the case for *msan runs)
hmm_profile = os.path.join(paths.hmmer_runs_dir, "reference.hmm")
# build it if it doesn't exist
if not os.path.isfile(hmm_profile):
    hmm_profile = placement.launch_hmmbuild(ref_msa, paths.hmmer_runs_dir)
def evaluate_all_trees(paths):
  """
  This will evaluate all given trees with the given model description.
  For each tree, the model and branch lengths will be optimized while
  fixing the topology.
  This contrasts iqtree_tests, as a model optimization is performed
  for each treee separately.
  """
  iqtree_eval_dir_model = os.path.join(paths.runs_dir, 'iqtree_eval_model')
  util.mkdirp(iqtree_eval_dir_model)
  iqtree_eval_dir_gamma = os.path.join(paths.runs_dir, 'iqtree_eval_gamma')
  util.mkdirp(iqtree_eval_dir_gamma)
  raxml_eval_dir_gamma = os.path.join(paths.runs_dir, 'raxml_eval_gamma')
  util.make_path_clean(raxml_eval_dir_gamma)
  raxmlng_eval_dir_gamma = os.path.join(paths.runs_dir, 'raxmlng_eval_gamma')
  util.mkdirp(raxmlng_eval_dir_gamma)
  iqtree_eval_dir_gamma_median = os.path.join(paths.runs_dir, 'iqtree_eval_gamma_median')
  util.mkdirp(iqtree_eval_dir_gamma_median)
  raxmlng_eval_dir_gamma_median = os.path.join(paths.runs_dir, 'raxmlng_eval_gamma_median')
  util.mkdirp(raxmlng_eval_dir_gamma_median)

  print('Comparing LLHs for model %s' % common.subst_model)
  print('Loading RAxML-ng LLHs... ', end = '')
  raxmlng_lls = []
  with open(paths.raxml_all_ml_trees_ll) as reader:
    for line in reader:
      raxmlng_lls.append(float(line.split(' ')[0]))
  print('done.')

  print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '')
  tree_files = []
  with open(paths.raxml_all_ml_trees) as trees_file:
    for i, tree_str in enumerate(trees_file):
      # Write a separate newick file for this tree
      tree_file_name = os.path.join(iqtree_eval_dir_model, 'tree_%d.newick' % i)
      with open(tree_file_name, 'w') as tree_file:
        tree_file.write(tree_str)
      tree_files.append(tree_file_name)

  # Evaluate trees with model & brlen optimization
  pool = mp.Pool(common.available_cores)
  iqtree_lls = pool.starmap(iqtree_eval,
    [(paths.alignment, common.subst_model, tree_file_name, tree_file_name)
      for tree_file_name in tree_files])
  print('done')

  with open(paths.raxml_iqtree_ll_all, "w") as writer:
    writer.write('# this file contains the likelihood of all ML trees at the end of the raxml-ng run as\n')
    writer.write('# well as the likelihood as evaluated by iqtree (with model & brlen optimization under\n')
    writer.write('# fixed tree topology\n')
    writer.write('raxmlng,iqtree\n')
    for raxmlng_ll, iqtree_ll in zip(raxmlng_lls, iqtree_lls):
      writer.write('%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll))

  # Evaluate using GTR+GAMMA Model
  print('Comparing LLHs for GTR+GAMMA model')

  print('Evaluating trees with RAxML-ng (including model & brlen optimization)... ', end = '')
  raxmlng_lls = raxmlng_eval_all(paths.alignment, 'GTR+FO+G', paths.raxml_all_ml_trees, os.path.join(raxmlng_eval_dir_gamma, 'eval'))
  print('done')

  print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '')
  iqtree_lls = pool.starmap(iqtree_eval,
    [(paths.alignment, 'GTR+FO+G', tree_file_name, tree_file_name.replace(iqtree_eval_dir_model, iqtree_eval_dir_gamma))
      for tree_file_name in tree_files])
  print('done')

  print('Evaluating trees with RAxML (including model & brlen optimization)... ', end = '')
  raxml_lls = raxml_eval_all(paths.alignment, 'GTRGAMMAX', paths.raxml_all_ml_trees, os.path.join(raxml_eval_dir_gamma, 'eval'))
  print('done')

  with open(paths.gamma_ll_all, "w") as writer:
    writer.write('# this file contains the likelihood of all ML trees optimized and evaluated\n')
    writer.write('# under GTR+F0+G and fixed tree topology\n')
    writer.write('raxmlng,iqtree,raxml\n')
    for raxmlng_ll, iqtree_ll, raxml_ll in zip(raxmlng_lls, iqtree_lls, raxml_lls):
      writer.write('%.3f,%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll, raxml_ll))

  # Evaluate using GTR+GAMMA Model with median rates
  print('Comparing LLHs for GTR+GAMMA model with median rates')

  print('Evaluating trees with RAxML-ng (including model & brlen optimization)... ', end = '')
  raxmlng_lls = raxmlng_eval_all(paths.alignment, 'GTR+FO+GA', paths.raxml_all_ml_trees, os.path.join(raxmlng_eval_dir_gamma_median, 'eval'))
  print('done')

  print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '')
  iqtree_lls = pool.starmap(iqtree_eval,
    [(paths.alignment, 'GTR+FO+G', tree_file_name, tree_file_name.replace(iqtree_eval_dir_model, iqtree_eval_dir_gamma_median), True)
      for tree_file_name in tree_files])
  print('done')

  with open(paths.gamma_median_ll_all, "w") as writer:
    writer.write('# this file contains the likelihood of all ML trees optimized and evaluated\n')
    writer.write('# under GTR+F0+GA and fixed tree topology\n')
    writer.write('raxmlng,iqtree\n')
    for raxmlng_ll, iqtree_ll  in zip(raxmlng_lls, iqtree_lls):
      writer.write('%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll))