def generate_skbio_tree(classification, existing_tree=None): from skbio.tree import MissingNodeError, TreeNode otus = classification.results()['table'] if existing_tree is None: tree = TreeNode(name='1', length=1) tree.tax_name = 'Root' tree.rank = 'no rank' else: tree = existing_tree # we use this to keep track of nodes that haven't had their parent added yet unlinked = defaultdict(list) for otu in otus: tax_id = otu['tax_id'] # skip nodes already in the tree try: tree.find(tax_id) continue except MissingNodeError: pass # try to find a parent (if it exists) parent_id = otu['parent_tax_id'] try: parent = tree.find(parent_id) # the children are merged out here (only if we have a parent) to # make sure we're not creating trees inside unlinked itself children = _merge_unlinked(tax_id, unlinked) except MissingNodeError: parent = None children = None # create the node node = TreeNode(name=tax_id, length=1, children=children) node.tax_name = otu.get('name', '') node.rank = otu.get('rank', 'no rank') # either add the node to its parent or keep track of it until its # parent is "in tree" too if parent is not None: parent.append(node) else: unlinked[parent_id].append(node) assert len( unlinked) == 0, 'some unlinked nodes were not included in the tree' return tree
def tree_build(self): """Build a tree from the taxonomy data present in this object. This is designed for use with `ClassificationsDataFrame` or `SampleCollection`. Returns ------- `skbio.tree.TreeNode`, the root node of a tree that contains all the taxa in the current analysis and their parents leading back to the root node. """ from skbio.tree import TreeNode # build all the nodes nodes = {} for tax_id in self.taxonomy.index: node = TreeNode(name=tax_id, length=1) node.tax_name = self.taxonomy["name"][tax_id] node.rank = self.taxonomy["rank"][tax_id] node.parent_tax_id = self.taxonomy["parent_tax_id"][tax_id] nodes[tax_id] = node # generate all the links for tax_id in self.taxonomy.index: try: parent = nodes[nodes[tax_id].parent_tax_id] except KeyError: if tax_id != "1": warnings.warn( "tax_id={} has parent_tax_id={} which is not in tree" "".format(tax_id, nodes[tax_id].parent_tax_id)) continue parent.append(nodes[tax_id]) return nodes["1"]
def tree_build(self): """Build a tree from the taxonomy data present in this `ClassificationsDataFrame` or `SampleCollection`. Returns ------- `skbio.tree.TreeNode`, the root node of a tree that contains all the taxa in the current analysis and their parents leading back to the root node. """ from skbio.tree import TreeNode # build all the nodes nodes = {} for tax_id in self.taxonomy.index: node = TreeNode(name=tax_id, length=1) node.tax_name = self.taxonomy["name"][tax_id] node.rank = self.taxonomy["rank"][tax_id] node.parent_tax_id = self.taxonomy["parent_tax_id"][tax_id] nodes[tax_id] = node # generate all the links for tax_id in self.taxonomy.index: try: parent = nodes[nodes[tax_id].parent_tax_id] except KeyError: if tax_id != "1": warnings.warn( "tax_id={} has parent_tax_id={} which is not in tree" "".format(tax_id, nodes[tax_id].parent_tax_id) ) continue parent.append(nodes[tax_id]) return nodes["1"]