Example #1
0
    def _reroot(self, tree, outgroup_node, max_support=100):
        """Reroot tree taking proper care of bootstrap values."""

        # determine support values for each bipartition
        tree.encode_bipartitions()
        support_values = {}
        for nd in tree:
            support, taxon, aux_info = parse_label(nd.label)
            if nd.is_leaf():
                support_values[nd.bipartition] = max_support
            else:
                if support is not None:
                    support_values[nd.bipartition] = float(support)
                else:
                    support_values[nd.bipartition] = None

        # move support values for desired re-rooting
        new_root = outgroup_node.parent_node
        tree.reseed_at(new_root)
        tree.encode_bipartitions()
        for nd in tree:
            _, taxon, aux_info = parse_label(nd.label)
            nd.label = create_label(
                support_values.get(nd.bipartition, "not_specified"), taxon,
                aux_info)
        tree.seed_node.edge.length = None

        # do a hard re-rooting of the tree
        # (this invalidates the previous bipartitions, so must be done seperately)
        tree.is_rooted = True
        tree.reroot_at_edge(outgroup_node.edge,
                            length1=0.5 * outgroup_node.edge_length,
                            length2=0.5 * outgroup_node.edge_length)

        # determine bootstrap for new node
        for child in tree.seed_node.child_node_iter():
            if outgroup_node.is_leaf():
                if not child.is_leaf():
                    support, taxon, aux_info = parse_label(child.label)
                    child.label = create_label(max_support, taxon, aux_info)
            else:
                if child != outgroup_node:
                    support, _taxon, _aux_info = parse_label(
                        outgroup_node.label)
                    _support, taxon, aux_info = parse_label(child.label)
                    child.label = create_label(support, taxon, aux_info)

        return tree
Example #2
0
    def _assign_taxon_labels(self, fmeasure_for_taxa):
        """Assign taxon labels to nodes.
        
        Parameters
        ----------
        fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall), ...]
          Node with highest F-measure for each taxon.
          
        Returns
        -------
        set
            Taxon labels placed in tree.
        """

        placed_taxon = set()
        for taxon in Taxonomy().sort_taxa(list(fmeasure_for_taxa.keys())):
            if len(fmeasure_for_taxa[taxon]) == 1:
                placed_taxon.add(taxon)
                node, fmeasure, precision, recall = fmeasure_for_taxa[taxon][0]

                support, taxon_label, aux_info = parse_label(node.label)
                if taxon_label:
                    taxon_label += '; ' + taxon
                else:
                    taxon_label = taxon
                node.label = create_label(support, taxon_label, aux_info)

        return placed_taxon
Example #3
0
    def _assign_taxon_labels(self, fmeasure_for_taxa):
        """Assign taxon labels to nodes.
        
        Parameters
        ----------
        fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall), ...]
          Node with highest F-measure for each taxon.
          
        Returns
        -------
        set
            Taxon labels placed in tree.
        """

        placed_taxon = set()
        for taxon in Taxonomy().sort_taxa(fmeasure_for_taxa.keys()):
            if len(fmeasure_for_taxa[taxon]) == 1:
                placed_taxon.add(taxon)
                node, fmeasure, precision, recall = fmeasure_for_taxa[taxon][0]
                
                support, taxon_label, aux_info = parse_label(node.label)
                if taxon_label:
                    taxon_label += ';' + taxon
                else:
                    taxon_label = taxon
                node.label = create_label(support, taxon_label, aux_info)

        return placed_taxon
Example #4
0
    def _reroot(self, tree, outgroup_node, max_support=100):
        """Reroot tree taking proper care of bootstrap values."""

        # determine support values for each bipartition
        tree.encode_bipartitions()
        support_values = {}
        for nd in tree:
            support, taxon, aux_info = parse_label(nd.label)
            if nd.is_leaf():
                support_values[nd.bipartition] = max_support
            else:
                if support is not None:
                    support_values[nd.bipartition] = float(support)
                else:
                    support_values[nd.bipartition] = None
     
        # move support values for desired re-rooting
        new_root = outgroup_node.parent_node
        tree.reseed_at(new_root)
        tree.encode_bipartitions()
        for nd in tree:
            _, taxon, aux_info = parse_label(nd.label)
            nd.label = create_label(support_values.get(nd.bipartition, "not_specified"), taxon, aux_info)
        tree.seed_node.edge.length = None
     
        # do a hard re-rooting of the tree
        # (this invalidates the previous bipartitions, so must be done seperately)
        tree.is_rooted = True
        tree.reroot_at_edge(outgroup_node.edge,
                                    length1=0.5 * outgroup_node.edge_length,
                                    length2=0.5 * outgroup_node.edge_length)

        # determine bootstrap for new node
        for child in tree.seed_node.child_node_iter():
            if outgroup_node.is_leaf():
                if not child.is_leaf():
                    support, taxon, aux_info = parse_label(child.label)
                    child.label = create_label(max_support, taxon, aux_info)
            else:
                if child != outgroup_node:
                    support, _taxon, _aux_info = parse_label(outgroup_node.label)
                    _support, taxon, aux_info = parse_label(child.label)
                    child.label = create_label(support, taxon, aux_info)
 
        return tree
Example #5
0
def bootstrap_support(input_tree, replicate_trees, output_tree):
    """ Calculate support for tree with replicates covering the same taxon set.

    Parameters
    ----------
    input_tree : str
      Tree inferred from complete data.
    replicate_trees : iterable
      Files containing replicate trees.
    output_tree: str
      Name of output tree with support values.
    """

    import dendropy

    # read tree and bootstrap replicates as unrooted, and
    # calculate bootstrap support
    orig_tree = dendropy.Tree.get_from_path(input_tree,
                                            schema='newick',
                                            rooting="force-unrooted",
                                            preserve_underscores=True)
    orig_tree.bipartitions = True
    orig_tree.encode_bipartitions()

    rep_trees = dendropy.TreeArray(taxon_namespace=orig_tree.taxon_namespace,
                                   is_rooted_trees=False,
                                   ignore_edge_lengths=True,
                                   ignore_node_ages=True,
                                   use_tree_weights=False)

    rep_trees.read_from_files(files=replicate_trees,
                              schema='newick',
                              rooting="force-unrooted",
                              preserve_underscores=True,
                              taxon_namespace=orig_tree.taxon_namespace)

    rep_trees.summarize_splits_on_tree(orig_tree,
                                       is_bipartitions_updated=True,
                                       add_support_as_node_attribute=True,
                                       support_as_percentages=True)

    for node in orig_tree.internal_nodes():
        if node.label:
            support, taxon, aux_info = parse_label(node.label)
            node.label = create_label(node.support, taxon, aux_info)
        else:
            node.label = str(int(node.support))

    orig_tree.write_to_path(output_tree,
                            schema='newick',
                            suppress_rooting=True,
                            unquoted_underscores=True)
Example #6
0
    def _strip_taxon_labels(self, tree):
        """Remove any previous taxon labels.
        
        Parameters
        ----------
        tree : Tree
          Dendropy Tree.
        """

        for node in tree.internal_nodes():
            support, _taxon, _aux_info = parse_label(node.label)
            if support:
                node.label = create_label(support, None, None)
Example #7
0
 def _strip_taxon_labels(self, tree):
     """Remove any previous taxon labels.
     
     Parameters
     ----------
     tree : Tree
       Dendropy Tree.
     """
     
     for node in tree.internal_nodes():
         support, _taxon, _aux_info = parse_label(node.label)
         if support:
             node.label = create_label(support, None, None)
Example #8
0
 def _check_fractional_bootstraps(self, tree):
     """Check if bootstrap values are between [0, 1] and change to [0, 100]."""
     
     fractional_bootstrap = True
     for n in tree.preorder_node_iter():
         support, label, aux_info = parse_label(n.label) 
         if support is not None and support > 1.0:
             fractional_bootstrap = False
             break
     
     if fractional_bootstrap:
         for n in tree.preorder_node_iter():
             support, label, aux_info = parse_label(n.label) 
             if support is not None:
                 n.label = create_label(int(support*100 + 0.5), label, aux_info)
Example #9
0
    def _resolve_ambiguous_placements(self,
                                      fmeasure_for_taxa,
                                      median_rank_rd,
                                      max_rd_diff=0.1):
        """Resolve ambiguous taxon label placements using median relative divergences.
        
        Parameters
        ----------
        fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall)]
          Node with highest F-measure for each taxon.
        median_rank_rd : d[rank_index] -> float
          Median relative divergence for each taxonomic rank.
        max_rd_diff : float
          Maximum difference in relative divergence for assigning a taxonomic label.
        """

        # For ambiguous nodes place them closest to median for rank
        # and within accepted relative divergence distance. Taxon labels
        # are placed in reverse taxonomic order (species to domain) and
        # this ordering used to ensure taxonomic consistency.
        for taxon in Taxonomy().sort_taxa(list(fmeasure_for_taxa.keys()),
                                          reverse=True):
            if len(fmeasure_for_taxa[taxon]) == 1:
                continue

            rank_prefix = taxon[0:3]
            rank_index = Taxonomy.rank_prefixes.index(rank_prefix)
            rd = median_rank_rd[rank_index]

            # Find node closest to median distance, but making sure
            # taxon is not placed below a more specific taxon label.
            # The 'fmeasure_for_taxa' stores node information in preorder.
            closest_index = None
            closest_dist = 1e9
            closest_node = None
            for i, d in enumerate(fmeasure_for_taxa[taxon]):
                cur_node = d[0]

                cur_rank_index = -1
                _support, cur_taxon, _aux_info = parse_label(cur_node.label)
                if cur_taxon:
                    cur_prefix = cur_taxon.split(';')[-1].strip()[0:3]
                    cur_rank_index = Taxonomy.rank_prefixes.index(cur_prefix)

                if cur_rank_index > rank_index:
                    # reached a node with a more specific label so
                    # label should be appended to this node or
                    # placed above it
                    if closest_index is None:
                        closest_index = i
                        closest_node = cur_node
                    break

                rd_diff = abs(rd - cur_node.rel_dist)
                if rd_diff > max_rd_diff:
                    continue

                if rd_diff < closest_dist:
                    closest_dist = rd_diff
                    closest_index = i
                    closest_node = cur_node

            if closest_index is None:
                # no node is within an acceptable relative divergence distance
                # for this label so it should be placed at the most extant node
                # in order to be conservative
                closest_index = len(fmeasure_for_taxa[taxon]) - 1
                closest_node = fmeasure_for_taxa[taxon][closest_index][0]

            # add label to node
            support, cur_taxon, aux_info = parse_label(closest_node.label)
            if not cur_taxon:
                taxa_str = taxon
            else:
                taxa = [t.strip() for t in cur_taxon.split(';')] + [taxon]
                taxa_str = '; '.join(Taxonomy().sort_taxa(taxa))

            closest_node.label = create_label(support, taxa_str, aux_info)

            # remove other potential node assignments
            fmeasure_for_taxa[taxon] = [
                fmeasure_for_taxa[taxon][closest_index]
            ]
Example #10
0
    def _resolve_ambiguous_placements(self, tree, fmeasure_for_taxa, median_rank_rd):
        """Resolve ambiguous taxon label placements using median relative divergences.
        
        Parameters
        ----------
        tree : Tree
          Dendropy tree.
        fmeasure_for_taxa : d[taxon] -> [(Node, F-measure, precision, recall)]
          Node with highest F-measure for each taxon.
        median_rank_rd : d[rank_index] -> float
          Median relative divergence for each taxonomic rank.
        """
        
        # For ambiguous nodes place them closest to median for rank 
        # and within accept relative divergence distance. Taxon labels
        # are placed in reverse taxonomic order (species to domain) and
        # this ordering used to ensure taxonomic consistency.
        for taxon in Taxonomy().sort_taxa(fmeasure_for_taxa.keys(), reverse=True):
            if len(fmeasure_for_taxa[taxon]) == 1:
                continue
                                
            rank_prefix = taxon[0:3]
            rank_index = Taxonomy.rank_prefixes.index(rank_prefix)
            rd = median_rank_rd[rank_index]

            # Find node closest to median distance, but making sure
            # taxon is not placed below a more specific taxon label.
            # The 'fmeasure_for_taxa' stores node information in preorder.
            closest_index = None
            closest_dist = 1e9
            closest_node = None
            for i, d in enumerate(fmeasure_for_taxa[taxon]):
                cur_node = d[0]    

                cur_rank_index = -1
                _support, cur_taxon, _aux_info = parse_label(cur_node.label)
                if cur_taxon:
                    cur_prefix = cur_taxon.split(';')[-1][0:3]
                    cur_rank_index = Taxonomy.rank_prefixes.index(cur_prefix)
                    
                if cur_rank_index > rank_index:
                    # reached a node with a more specific label so
                    # label should be appended to this node or
                    # placed above it
                    if closest_index is None:
                        closest_index = i
                        closest_node = cur_node
                    break
                    
                rd_diff = abs(rd - cur_node.rel_dist)
                if rd_diff < 0.1 and rd_diff < closest_dist:
                    closest_dist = rd_diff
                    closest_index = i
                    closest_node = cur_node
                    
            if closest_index is None:
                # no node is within an acceptable relative divergence distance 
                # for this label so it should be placed at the most extant node
                # which should be a leaf node
                closest_index = len(fmeasure_for_taxa[taxon]) - 1
                closest_node = fmeasure_for_taxa[taxon][closest_index][0]
                
                if not closest_node.is_leaf():
                    self.logger.error('Leaf node expected!')
                    sys.exit()
                    
            # add label to node
            support, cur_taxon, aux_info = parse_label(closest_node.label)
            if not cur_taxon:
                taxa_str = taxon
            else:
                taxa = cur_taxon.split(';') + [taxon]
                taxa_str = ';'.join(Taxonomy().sort_taxa(taxa))
                
            closest_node.label = create_label(support, taxa_str, aux_info)
                    
            # remove other potential node assignments
            fmeasure_for_taxa[taxon] = [fmeasure_for_taxa[taxon][closest_index]]