def get_parent_tip_ranks(self, tax_tree): rank_tips = {} rank_parent = {} for node in tax_tree.traverse("postorder"): if node.is_leaf() or node.is_root(): continue tax_path = node.name ranks = Taxonomy.split_rank_uid(tax_path) rank_lvl = Taxonomy.lowest_assigned_rank_level(ranks) if rank_lvl < 2: continue parent_ranks = Taxonomy.split_rank_uid(node.up.name) parent_lvl = Taxonomy.lowest_assigned_rank_level(parent_ranks) if parent_lvl < 1: continue rank_seqs = node.get_leaf_names() rank_size = len(rank_seqs) if rank_size < 2 or rank_size > self.reftree_size-4: continue # print rank_lvl, "\t", tax_path, "\t", rank_seqs, "\n" rank_tips[tax_path] = node.get_leaf_names() rank_parent[tax_path] = parent_ranks return rank_parent, rank_tips
def get_parent_tip_ranks(self, tax_tree): rank_tips = {} rank_parent = {} for node in tax_tree.traverse("postorder"): if node.is_leaf() or node.is_root(): continue tax_path = node.name ranks = Taxonomy.split_rank_uid(tax_path) rank_lvl = Taxonomy.lowest_assigned_rank_level(ranks) if rank_lvl < 2: continue parent_ranks = Taxonomy.split_rank_uid(node.up.name) parent_lvl = Taxonomy.lowest_assigned_rank_level(parent_ranks) if parent_lvl < 1: continue rank_seqs = node.get_leaf_names() rank_size = len(rank_seqs) if rank_size < 2 or rank_size > self.reftree_size - 4: continue # print rank_lvl, "\t", tax_path, "\t", rank_seqs, "\n" rank_tips[tax_path] = node.get_leaf_names() rank_parent[tax_path] = parent_ranks return rank_parent, rank_tips
def get_orig_ranks(self, seq_name): nodes = self.tax_tree.get_leaves_by_name(seq_name) if len(nodes) != 1: print "FATAL ERROR: Sequence %s is not found in the taxonomic tree, or is present more than once!" % seq_name sys.exit() seq_node = nodes[0] orig_ranks = Taxonomy.split_rank_uid(seq_node.up.name) return orig_ranks
def run_leave_subtree_out_test(self): job_name = self.cfg.subst_name("l1out_rank_%NAME%") # if self.jplace_fname: # jp = EpaJsonParser(self.jplace_fname) # else: #create file with subtrees rank_parent, rank_tips = self.get_parent_tip_ranks(self.tax_tree) subtree_list = list(rank_tips.items()) if len(subtree_list) == 0: return 0 subtree_list_file = self.cfg.tmp_fname("treelist_%NAME%.txt") with open(subtree_list_file, "w") as fout: for rank_name, tips in subtree_list: fout.write("%s\n" % " ".join(tips)) jp_list = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_subtree", subtree_fname=subtree_list_file) subtree_count = 0 for jp in jp_list: placements = jp.get_placement() for place in placements: ranks, lws = self.classify_seq(place) tax_path = subtree_list[subtree_count][0] orig_ranks = Taxonomy.split_rank_uid(tax_path) rank_level = Taxonomy.lowest_assigned_rank_level(orig_ranks) rank_prefix = self.tax_code.guess_rank_level_name( orig_ranks, rank_level)[0] rank_name = orig_ranks[rank_level] if not rank_name.startswith(rank_prefix): rank_name = rank_prefix + rank_name parent_ranks = rank_parent[tax_path] # print orig_ranks, "\n", parent_ranks, "\n", ranks, "\n" mis_rec = self.check_rank_tax_labels(rank_name, parent_ranks, ranks, lws) if mis_rec: self.misrank_conf_map[tax_path] = mis_rec['conf'] subtree_count += 1 return subtree_count
def run_leave_subtree_out_test(self): job_name = self.cfg.subst_name("l1out_rank_%NAME%") # if self.jplace_fname: # jp = EpaJsonParser(self.jplace_fname) # else: #create file with subtrees rank_parent, rank_tips = get_parent_tip_ranks(self.tax_tree) subtree_list = rank_tips.items() if len(subtree_list) == 0: return 0 subtree_list_file = self.cfg.tmp_fname("treelist_%NAME%.txt") with open(subtree_list_file, "w") as fout: for rank_name, tips in subtree_list: fout.write("%s\n" % " ".join(tips)) jp_list = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_subtree", subtree_fname=subtree_list_file) subtree_count = 0 for jp in jp_list: placements = jp.get_placement() for place in placements: ranks, lws = self.classify_seq(place) tax_path = subtree_list[subtree_count][0] orig_ranks = Taxonomy.split_rank_uid(tax_path) rank_level = Taxonomy.lowest_assigned_rank_level(orig_ranks) rank_prefix = self.guess_rank_level_name(orig_ranks, rank_level)[0] rank_name = orig_ranks[rank_level] if not rank_name.startswith(rank_prefix): rank_name = rank_prefix + rank_name parent_ranks = rank_parent[tax_path] # print orig_ranks, "\n", parent_ranks, "\n", ranks, "\n" mis_rec = self.check_rank_tax_labels(rank_name, parent_ranks, ranks, lws) if mis_rec: self.misrank_conf_map[tax_path] = mis_rec['conf'] subtree_count += 1 return subtree_count
def run_leave_subtree_out_test(self): job_name = self.cfg.subst_name("l1out_rank_%NAME%") # if self.jplace_fname: # jp = EpaJsonParser(self.jplace_fname) # else: #create file with subtrees rank_tips = {} rank_parent = {} for node in self.tax_tree.traverse("postorder"): if node.is_leaf() or node.is_root(): continue tax_path = node.name ranks = Taxonomy.split_rank_uid(tax_path) rank_lvl = Taxonomy.lowest_assigned_rank_level(ranks) if rank_lvl < 2: continue parent_ranks = Taxonomy.split_rank_uid(node.up.name) parent_lvl = Taxonomy.lowest_assigned_rank_level(parent_ranks) if parent_lvl < 1: continue rank_seqs = node.get_leaf_names() rank_size = len(rank_seqs) if rank_size < 2 or rank_size > self.reftree_size-4: continue # print rank_lvl, "\t", tax_path, "\t", rank_seqs, "\n" rank_tips[tax_path] = node.get_leaf_names() rank_parent[tax_path] = parent_ranks subtree_list = rank_tips.items() if len(subtree_list) == 0: return 0 subtree_list_file = self.cfg.tmp_fname("treelist_%NAME%.txt") with open(subtree_list_file, "w") as fout: for rank_name, tips in subtree_list: fout.write("%s\n" % " ".join(tips)) jp_list = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_subtree", subtree_fname=subtree_list_file) subtree_count = 0 for jp in jp_list: placements = jp.get_placement() for place in placements: ranks, lws = self.classify_seq(place) tax_path = subtree_list[subtree_count][0] orig_ranks = Taxonomy.split_rank_uid(tax_path) rank_level = Taxonomy.lowest_assigned_rank_level(orig_ranks) rank_prefix = self.guess_rank_level_name(orig_ranks, rank_level)[0] rank_name = orig_ranks[rank_level] if not rank_name.startswith(rank_prefix): rank_name = rank_prefix + rank_name parent_ranks = rank_parent[tax_path] # print orig_ranks, "\n", parent_ranks, "\n", ranks, "\n" mis_rec = self.check_rank_tax_labels(rank_name, parent_ranks, ranks, lws) if mis_rec: self.misrank_conf_map[tax_path] = mis_rec['conf'] subtree_count += 1 return subtree_count
def test_rank_uid(self): tax = self.taxonomy for sid in tax.get_map().iterkeys(): self.assertEqual(tax.get_seq_ranks(sid), Taxonomy.split_rank_uid(tax.seq_rank_id(sid)))
def run_leave_subtree_out_test(self): job_name = self.cfg.subst_name("l1out_rank_%NAME%") # if self.jplace_fname: # jp = EpaJsonParser(self.jplace_fname) # else: #create file with subtrees rank_tips = {} rank_parent = {} for node in self.tax_tree.traverse("postorder"): if node.is_leaf() or node.is_root(): continue tax_path = node.name ranks = Taxonomy.split_rank_uid(tax_path) rank_lvl = Taxonomy.lowest_assigned_rank_level(ranks) if rank_lvl < 2: continue parent_ranks = Taxonomy.split_rank_uid(node.up.name) parent_lvl = Taxonomy.lowest_assigned_rank_level(parent_ranks) if parent_lvl < 1: continue rank_seqs = node.get_leaf_names() rank_size = len(rank_seqs) if rank_size < 2 or rank_size > self.reftree_size - 4: continue # print rank_lvl, "\t", tax_path, "\t", rank_seqs, "\n" rank_tips[tax_path] = node.get_leaf_names() rank_parent[tax_path] = parent_ranks subtree_list = rank_tips.items() if len(subtree_list) == 0: return 0 subtree_list_file = self.cfg.tmp_fname("treelist_%NAME%.txt") with open(subtree_list_file, "w") as fout: for rank_name, tips in subtree_list: fout.write("%s\n" % " ".join(tips)) jp_list = self.raxml.run_epa(job_name, self.refalign_fname, self.reftree_fname, self.optmod_fname, mode="l1o_subtree", subtree_fname=subtree_list_file) subtree_count = 0 for jp in jp_list: placements = jp.get_placement() for place in placements: ranks, lws = self.classify_seq(place) tax_path = subtree_list[subtree_count][0] orig_ranks = Taxonomy.split_rank_uid(tax_path) rank_level = Taxonomy.lowest_assigned_rank_level(orig_ranks) rank_prefix = self.guess_rank_level_name( orig_ranks, rank_level)[0] rank_name = orig_ranks[rank_level] if not rank_name.startswith(rank_prefix): rank_name = rank_prefix + rank_name parent_ranks = rank_parent[tax_path] # print orig_ranks, "\n", parent_ranks, "\n", ranks, "\n" mis_rec = self.check_rank_tax_labels(rank_name, parent_ranks, ranks, lws) if mis_rec: self.misrank_conf_map[tax_path] = mis_rec['conf'] subtree_count += 1 return subtree_count