Ejemplo n.º 1
0
def generate_skbio_tree(classification, existing_tree=None):
    from skbio.tree import MissingNodeError, TreeNode

    otus = classification.results()['table']
    if existing_tree is None:
        tree = TreeNode(name='1', length=1)
        tree.tax_name = 'Root'
        tree.rank = 'no rank'
    else:
        tree = existing_tree

    # we use this to keep track of nodes that haven't had their parent added yet
    unlinked = defaultdict(list)

    for otu in otus:
        tax_id = otu['tax_id']
        # skip nodes already in the tree
        try:
            tree.find(tax_id)
            continue
        except MissingNodeError:
            pass

        # try to find a parent (if it exists)
        parent_id = otu['parent_tax_id']
        try:
            parent = tree.find(parent_id)
            # the children are merged out here (only if we have a parent) to
            # make sure we're not creating trees inside unlinked itself
            children = _merge_unlinked(tax_id, unlinked)
        except MissingNodeError:
            parent = None
            children = None

        # create the node
        node = TreeNode(name=tax_id, length=1, children=children)
        node.tax_name = otu.get('name', '')
        node.rank = otu.get('rank', 'no rank')

        # either add the node to its parent or keep track of it until its
        # parent is "in tree" too
        if parent is not None:
            parent.append(node)
        else:
            unlinked[parent_id].append(node)

    assert len(
        unlinked) == 0, 'some unlinked nodes were not included in the tree'

    return tree
Ejemplo n.º 2
0
    def tree_build(self):
        """Build a tree from the taxonomy data present in this object.

        This is designed for use with `ClassificationsDataFrame` or `SampleCollection`.

        Returns
        -------
        `skbio.tree.TreeNode`, the root node of a tree that contains all the taxa in the current
        analysis and their parents leading back to the root node.
        """
        from skbio.tree import TreeNode

        # build all the nodes
        nodes = {}

        for tax_id in self.taxonomy.index:
            node = TreeNode(name=tax_id, length=1)
            node.tax_name = self.taxonomy["name"][tax_id]
            node.rank = self.taxonomy["rank"][tax_id]
            node.parent_tax_id = self.taxonomy["parent_tax_id"][tax_id]

            nodes[tax_id] = node

        # generate all the links
        for tax_id in self.taxonomy.index:
            try:
                parent = nodes[nodes[tax_id].parent_tax_id]
            except KeyError:
                if tax_id != "1":
                    warnings.warn(
                        "tax_id={} has parent_tax_id={} which is not in tree"
                        "".format(tax_id, nodes[tax_id].parent_tax_id))

                continue

            parent.append(nodes[tax_id])

        return nodes["1"]
Ejemplo n.º 3
0
    def tree_build(self):
        """Build a tree from the taxonomy data present in this `ClassificationsDataFrame` or
        `SampleCollection`.

        Returns
        -------
        `skbio.tree.TreeNode`, the root node of a tree that contains all the taxa in the current
        analysis and their parents leading back to the root node.
        """
        from skbio.tree import TreeNode

        # build all the nodes
        nodes = {}

        for tax_id in self.taxonomy.index:
            node = TreeNode(name=tax_id, length=1)
            node.tax_name = self.taxonomy["name"][tax_id]
            node.rank = self.taxonomy["rank"][tax_id]
            node.parent_tax_id = self.taxonomy["parent_tax_id"][tax_id]

            nodes[tax_id] = node

        # generate all the links
        for tax_id in self.taxonomy.index:
            try:
                parent = nodes[nodes[tax_id].parent_tax_id]
            except KeyError:
                if tax_id != "1":
                    warnings.warn(
                        "tax_id={} has parent_tax_id={} which is not in tree"
                        "".format(tax_id, nodes[tax_id].parent_tax_id)
                    )

                continue

            parent.append(nodes[tax_id])

        return nodes["1"]