def _reroot(self, tree, outgroup_node, max_support=100): """Reroot tree taking proper care of bootstrap values.""" # determine support values for each bipartition tree.encode_bipartitions() support_values = {} for nd in tree: support, taxon, aux_info = parse_label(nd.label) if nd.is_leaf(): support_values[nd.bipartition] = max_support else: if support is not None: support_values[nd.bipartition] = float(support) else: support_values[nd.bipartition] = None # move support values for desired re-rooting new_root = outgroup_node.parent_node tree.reseed_at(new_root) tree.encode_bipartitions() for nd in tree: _, taxon, aux_info = parse_label(nd.label) nd.label = create_label( support_values.get(nd.bipartition, "not_specified"), taxon, aux_info) tree.seed_node.edge.length = None # do a hard re-rooting of the tree # (this invalidates the previous bipartitions, so must be done seperately) tree.is_rooted = True tree.reroot_at_edge(outgroup_node.edge, length1=0.5 * outgroup_node.edge_length, length2=0.5 * outgroup_node.edge_length) # determine bootstrap for new node for child in tree.seed_node.child_node_iter(): if outgroup_node.is_leaf(): if not child.is_leaf(): support, taxon, aux_info = parse_label(child.label) child.label = create_label(max_support, taxon, aux_info) else: if child != outgroup_node: support, _taxon, _aux_info = parse_label( outgroup_node.label) _support, taxon, aux_info = parse_label(child.label) child.label = create_label(support, taxon, aux_info) return tree
def _assign_taxon_labels(self, fmeasure_for_taxa): """Assign taxon labels to nodes. Parameters ---------- fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall), ...] Node with highest F-measure for each taxon. Returns ------- set Taxon labels placed in tree. """ placed_taxon = set() for taxon in Taxonomy().sort_taxa(list(fmeasure_for_taxa.keys())): if len(fmeasure_for_taxa[taxon]) == 1: placed_taxon.add(taxon) node, fmeasure, precision, recall = fmeasure_for_taxa[taxon][0] support, taxon_label, aux_info = parse_label(node.label) if taxon_label: taxon_label += '; ' + taxon else: taxon_label = taxon node.label = create_label(support, taxon_label, aux_info) return placed_taxon
def _assign_taxon_labels(self, fmeasure_for_taxa): """Assign taxon labels to nodes. Parameters ---------- fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall), ...] Node with highest F-measure for each taxon. Returns ------- set Taxon labels placed in tree. """ placed_taxon = set() for taxon in Taxonomy().sort_taxa(fmeasure_for_taxa.keys()): if len(fmeasure_for_taxa[taxon]) == 1: placed_taxon.add(taxon) node, fmeasure, precision, recall = fmeasure_for_taxa[taxon][0] support, taxon_label, aux_info = parse_label(node.label) if taxon_label: taxon_label += ';' + taxon else: taxon_label = taxon node.label = create_label(support, taxon_label, aux_info) return placed_taxon
def _reroot(self, tree, outgroup_node, max_support=100): """Reroot tree taking proper care of bootstrap values.""" # determine support values for each bipartition tree.encode_bipartitions() support_values = {} for nd in tree: support, taxon, aux_info = parse_label(nd.label) if nd.is_leaf(): support_values[nd.bipartition] = max_support else: if support is not None: support_values[nd.bipartition] = float(support) else: support_values[nd.bipartition] = None # move support values for desired re-rooting new_root = outgroup_node.parent_node tree.reseed_at(new_root) tree.encode_bipartitions() for nd in tree: _, taxon, aux_info = parse_label(nd.label) nd.label = create_label(support_values.get(nd.bipartition, "not_specified"), taxon, aux_info) tree.seed_node.edge.length = None # do a hard re-rooting of the tree # (this invalidates the previous bipartitions, so must be done seperately) tree.is_rooted = True tree.reroot_at_edge(outgroup_node.edge, length1=0.5 * outgroup_node.edge_length, length2=0.5 * outgroup_node.edge_length) # determine bootstrap for new node for child in tree.seed_node.child_node_iter(): if outgroup_node.is_leaf(): if not child.is_leaf(): support, taxon, aux_info = parse_label(child.label) child.label = create_label(max_support, taxon, aux_info) else: if child != outgroup_node: support, _taxon, _aux_info = parse_label(outgroup_node.label) _support, taxon, aux_info = parse_label(child.label) child.label = create_label(support, taxon, aux_info) return tree
def bootstrap_support(input_tree, replicate_trees, output_tree): """ Calculate support for tree with replicates covering the same taxon set. Parameters ---------- input_tree : str Tree inferred from complete data. replicate_trees : iterable Files containing replicate trees. output_tree: str Name of output tree with support values. """ import dendropy # read tree and bootstrap replicates as unrooted, and # calculate bootstrap support orig_tree = dendropy.Tree.get_from_path(input_tree, schema='newick', rooting="force-unrooted", preserve_underscores=True) orig_tree.bipartitions = True orig_tree.encode_bipartitions() rep_trees = dendropy.TreeArray(taxon_namespace=orig_tree.taxon_namespace, is_rooted_trees=False, ignore_edge_lengths=True, ignore_node_ages=True, use_tree_weights=False) rep_trees.read_from_files(files=replicate_trees, schema='newick', rooting="force-unrooted", preserve_underscores=True, taxon_namespace=orig_tree.taxon_namespace) rep_trees.summarize_splits_on_tree(orig_tree, is_bipartitions_updated=True, add_support_as_node_attribute=True, support_as_percentages=True) for node in orig_tree.internal_nodes(): if node.label: support, taxon, aux_info = parse_label(node.label) node.label = create_label(node.support, taxon, aux_info) else: node.label = str(int(node.support)) orig_tree.write_to_path(output_tree, schema='newick', suppress_rooting=True, unquoted_underscores=True)
def _strip_taxon_labels(self, tree): """Remove any previous taxon labels. Parameters ---------- tree : Tree Dendropy Tree. """ for node in tree.internal_nodes(): support, _taxon, _aux_info = parse_label(node.label) if support: node.label = create_label(support, None, None)
def _check_fractional_bootstraps(self, tree): """Check if bootstrap values are between [0, 1] and change to [0, 100].""" fractional_bootstrap = True for n in tree.preorder_node_iter(): support, label, aux_info = parse_label(n.label) if support is not None and support > 1.0: fractional_bootstrap = False break if fractional_bootstrap: for n in tree.preorder_node_iter(): support, label, aux_info = parse_label(n.label) if support is not None: n.label = create_label(int(support*100 + 0.5), label, aux_info)
def _resolve_ambiguous_placements(self, fmeasure_for_taxa, median_rank_rd, max_rd_diff=0.1): """Resolve ambiguous taxon label placements using median relative divergences. Parameters ---------- fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall)] Node with highest F-measure for each taxon. median_rank_rd : d[rank_index] -> float Median relative divergence for each taxonomic rank. max_rd_diff : float Maximum difference in relative divergence for assigning a taxonomic label. """ # For ambiguous nodes place them closest to median for rank # and within accepted relative divergence distance. Taxon labels # are placed in reverse taxonomic order (species to domain) and # this ordering used to ensure taxonomic consistency. for taxon in Taxonomy().sort_taxa(list(fmeasure_for_taxa.keys()), reverse=True): if len(fmeasure_for_taxa[taxon]) == 1: continue rank_prefix = taxon[0:3] rank_index = Taxonomy.rank_prefixes.index(rank_prefix) rd = median_rank_rd[rank_index] # Find node closest to median distance, but making sure # taxon is not placed below a more specific taxon label. # The 'fmeasure_for_taxa' stores node information in preorder. closest_index = None closest_dist = 1e9 closest_node = None for i, d in enumerate(fmeasure_for_taxa[taxon]): cur_node = d[0] cur_rank_index = -1 _support, cur_taxon, _aux_info = parse_label(cur_node.label) if cur_taxon: cur_prefix = cur_taxon.split(';')[-1].strip()[0:3] cur_rank_index = Taxonomy.rank_prefixes.index(cur_prefix) if cur_rank_index > rank_index: # reached a node with a more specific label so # label should be appended to this node or # placed above it if closest_index is None: closest_index = i closest_node = cur_node break rd_diff = abs(rd - cur_node.rel_dist) if rd_diff > max_rd_diff: continue if rd_diff < closest_dist: closest_dist = rd_diff closest_index = i closest_node = cur_node if closest_index is None: # no node is within an acceptable relative divergence distance # for this label so it should be placed at the most extant node # in order to be conservative closest_index = len(fmeasure_for_taxa[taxon]) - 1 closest_node = fmeasure_for_taxa[taxon][closest_index][0] # add label to node support, cur_taxon, aux_info = parse_label(closest_node.label) if not cur_taxon: taxa_str = taxon else: taxa = [t.strip() for t in cur_taxon.split(';')] + [taxon] taxa_str = '; '.join(Taxonomy().sort_taxa(taxa)) closest_node.label = create_label(support, taxa_str, aux_info) # remove other potential node assignments fmeasure_for_taxa[taxon] = [ fmeasure_for_taxa[taxon][closest_index] ]
def _resolve_ambiguous_placements(self, tree, fmeasure_for_taxa, median_rank_rd): """Resolve ambiguous taxon label placements using median relative divergences. Parameters ---------- tree : Tree Dendropy tree. fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall)] Node with highest F-measure for each taxon. median_rank_rd : d[rank_index] -> float Median relative divergence for each taxonomic rank. """ # For ambiguous nodes place them closest to median for rank # and within accept relative divergence distance. Taxon labels # are placed in reverse taxonomic order (species to domain) and # this ordering used to ensure taxonomic consistency. for taxon in Taxonomy().sort_taxa(fmeasure_for_taxa.keys(), reverse=True): if len(fmeasure_for_taxa[taxon]) == 1: continue rank_prefix = taxon[0:3] rank_index = Taxonomy.rank_prefixes.index(rank_prefix) rd = median_rank_rd[rank_index] # Find node closest to median distance, but making sure # taxon is not placed below a more specific taxon label. # The 'fmeasure_for_taxa' stores node information in preorder. closest_index = None closest_dist = 1e9 closest_node = None for i, d in enumerate(fmeasure_for_taxa[taxon]): cur_node = d[0] cur_rank_index = -1 _support, cur_taxon, _aux_info = parse_label(cur_node.label) if cur_taxon: cur_prefix = cur_taxon.split(';')[-1][0:3] cur_rank_index = Taxonomy.rank_prefixes.index(cur_prefix) if cur_rank_index > rank_index: # reached a node with a more specific label so # label should be appended to this node or # placed above it if closest_index is None: closest_index = i closest_node = cur_node break rd_diff = abs(rd - cur_node.rel_dist) if rd_diff < 0.1 and rd_diff < closest_dist: closest_dist = rd_diff closest_index = i closest_node = cur_node if closest_index is None: # no node is within an acceptable relative divergence distance # for this label so it should be placed at the most extant node # which should be a leaf node closest_index = len(fmeasure_for_taxa[taxon]) - 1 closest_node = fmeasure_for_taxa[taxon][closest_index][0] if not closest_node.is_leaf(): self.logger.error('Leaf node expected!') sys.exit() # add label to node support, cur_taxon, aux_info = parse_label(closest_node.label) if not cur_taxon: taxa_str = taxon else: taxa = cur_taxon.split(';') + [taxon] taxa_str = ';'.join(Taxonomy().sort_taxa(taxa)) closest_node.label = create_label(support, taxa_str, aux_info) # remove other potential node assignments fmeasure_for_taxa[taxon] = [fmeasure_for_taxa[taxon][closest_index]]