def testCanonicalizeByNumberOfTipsAheadOfName(self): """ In forming a canonical tree, child nodes must be sorted by number of tips in preference to name. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='a', children=[ TreeNode(), TreeNode(), TreeNode(), ]), TreeNode(name='b', children=[ TreeNode(), ]), TreeNode(name='c', children=[ TreeNode(), TreeNode(), ]), ]) self.assertEqual( ['a', 'b', 'c'], [child.name for child in njtree.tree.children]) self.assertEqual( ['b', 'c', 'a'], [child.name for child in njtree.canonicalize().tree.children])
def append_taxa(tree, clusters, at_tip=True): """Add extra taxa to tree tips to form polytomic clades. Parameters ---------- tree : skbio.TreeNode tree to add taxa clusters : dict of iterable of str map of existing tip name to extra taxon name(s) at_tip : bool if True, create new node and set existing and extra taxa as zero-branch length tips of the new node; if False, append extra taxa to parent node with equal branch length as existing tip Returns ------- skbio.TreeNode resulting tree """ if not set(clusters).issubset([x.name for x in tree.tips()]): raise ValueError('Error: Some core taxa are absent from the tree.') res = tree.copy() for core, extra in clusters.items(): tip = res.find(core) if at_tip: tip.append(TreeNode(tip.name)) tip.extend([TreeNode(x) for x in extra]) tip.name = None else: tip.parent.extend([TreeNode(x, tip.length) for x in extra]) return res
def make_consensus_tree(cons_split, check_for_rank=True, tips=None): """Returns a mapping by rank for names to their parent names and counts""" god_node = TreeNode(name=None) god_node.Rank = None base = list(cons_split)[0] cur_node = god_node # create a base path in the tree for rank, name in enumerate(base): new_node = TreeNode(name=name) new_node.Rank = rank cur_node.append(new_node) cur_node = new_node # setup the initial childlookup structure so taht we don't have to # always iterate over .children for n in god_node.traverse(include_self=True): if n.is_tip(): n.ChildLookup = {} continue n.ChildLookup = {n.children[0].name: n.children[0]} # for every consensus string, start at the "god" node for idx, con in enumerate(cons_split): cur_node = god_node # for each name, see if we've seen it, if not, add that puppy on for rank, name in enumerate(con): if name in cur_node.ChildLookup: cur_node = cur_node.ChildLookup[name] else: new_node = TreeNode(name=name) new_node.Rank = rank new_node.ChildLookup = {} cur_node.append(new_node) cur_node.ChildLookup[name] = new_node cur_node = new_node if tips is not None: cur_node.append(TreeNode(name=tips[idx])) # build an assist lookup dict lookup = {} for node in god_node.traverse(): if node.name is None: continue if check_for_rank and '__' in node.name and \ node.name.split('__')[1] == '': continue lookup[node.name] = node return god_node, lookup
def testRootByOneNodeName(self): """ Rooting by one node name must work. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='c'), TreeNode(name='d'), TreeNode(name='b'), TreeNode(name='a')]) self.assertEqual( ['c', 'd', 'b', 'a'], [child.name for child in njtree.root(['a']).tree.children])
def testCountCladesOneChild(self): """ In a tree with one child, there is one clade. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='a'), ]) self.assertEqual( { frozenset(['a']): 1, }, njtree.countClades() )
def testRootByInexistentNodeNameMustRaiseError(self): """ Rooting by an inexistent node name must raise an exception. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='c'), TreeNode(name='d'), TreeNode(name='b'), TreeNode(name='a')]) error = 'Node f is not in self' six.assertRaisesRegex(self, MissingNodeError, error, njtree.root, ['f'])
def test_to_newick_single_node(self): # single node, no name, with semicolon obs = TreeNode().to_newick() self.assertEqual(obs, ';') # single node, no name, without semicolon obs = TreeNode().to_newick(semicolon=False) self.assertEqual(obs, '') # single node, with name, with semicolon obs = TreeNode(name='brofist').to_newick() self.assertEqual(obs, 'brofist;') # single node, with name, without semicolon obs = TreeNode(name='brofist').to_newick(semicolon=False) self.assertEqual(obs, 'brofist')
def test_adopt(self): """Adopt a node!""" n1 = TreeNode(name='n1') n2 = TreeNode(name='n2') n3 = TreeNode(name='n3') self.simple_t._adopt(n1) self.simple_t.children[-1]._adopt(n2) n2._adopt(n3) # adopt doesn't update .children self.assertEqual(len(self.simple_t.children), 2) self.assertIs(n1.parent, self.simple_t) self.assertIs(n2.parent, self.simple_t.children[-1]) self.assertIs(n3.parent, n2)
def root_above(node, name=None): """Re-root a tree between a give node and its parent. Parameters ---------- node : skbio.TreeNode node above which the new root will be placed name : str, optional name of the new root Returns ------- skbio.TreeNode resulting rooted tree Notes ----- Unlike scikit-bio's `root_at` function which actually generates an unrooted tree, this function generates a rooted tree (the root of which has exactly two children). """ # walk down from self node left = walk_copy(node, node.parent) # walk up from parent node right = walk_copy(node.parent, node) # set basal branch lengths to be half of the original, i.e., midpoint left.length = right.length = node.length / 2 # create new root res = TreeNode(name, children=[left, right]) res.support = None return res
def iter_node(node): # sort nodes by # children then by taxid, so results are replicable for cid in sorted(taxdump[node.name]['children'], key=lambda x: (len(taxdump[x]['children']), int(x))): child = TreeNode(cid, branch_length) node.extend([child]) iter_node(child)
def setUp(self): """Prep the self""" self.simple_t = TreeNode.from_newick("((a,b)i1,(c,d)i2)root;") nodes = dict([(x, TreeNode(x)) for x in 'abcdefgh']) nodes['a'].append(nodes['b']) nodes['b'].append(nodes['c']) nodes['c'].append(nodes['d']) nodes['c'].append(nodes['e']) nodes['c'].append(nodes['f']) nodes['f'].append(nodes['g']) nodes['a'].append(nodes['h']) self.TreeNode = nodes self.TreeRoot = nodes['a'] def rev_f(items): items.reverse() def rotate_f(items): tmp = items[-1] items[1:] = items[:-1] items[0] = tmp self.rev_f = rev_f self.rotate_f = rotate_f self.complex_tree = TreeNode.from_newick("(((a,b)int1,(x,y,(w,z)int2," "(c,d)int3)int4),(e,f)int5);")
def testCountCladesEmptyTree(self): """ In a tree with no children, there are no clades. """ njtree = NJTree() njtree.tree = TreeNode() self.assertEqual(Counter(), njtree.countClades())
def build_taxdump_tree(taxdump): """Build NCBI taxdump tree. Parameters ---------- taxdump : dict of dict attributes of each taxid, see read_taxdump Returns ------- skbio.TreeNode a tree representing taxdump """ # create the tree from root tree = TreeNode('1') # iteratively attach child nodes to parent node def iter_node(node): for cid in taxdump[node.name]['children']: child = TreeNode(cid) node.extend([child]) iter_node(child) iter_node(tree) return tree
def iter_node(node): try: for x in taxon2children[node.name]: child = TreeNode(x) node.extend([child]) iter_node(child) except KeyError: pass
def testCanonicalizeByNodeLength(self): """ In forming a canonical tree, child nodes must be sorted by length. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(length=13), TreeNode(length=11), TreeNode(length=18), TreeNode(length=14)]) self.assertEqual( [13, 11, 18, 14], [child.length for child in njtree.tree.children]) self.assertEqual( [11, 13, 14, 18], [child.length for child in njtree.canonicalize().tree.children])
def _setup_balanced_binary(self, kwargs_list): trees = [] for kwargs in kwargs_list: trees.append(TreeNode(**kwargs)) trees[0].extend([trees[2], trees[3]]) trees[1].extend([trees[4], trees[5]]) trees[6].extend([trees[0], trees[1]]) return trees[6]
def _setup_tree(self, kwargs_list): trees = [] for kwargs in kwargs_list: trees.append(TreeNode(**kwargs)) trees[4].extend([trees[2], trees[3]]) trees[5].extend([trees[0], trees[1], trees[4]]) return trees[5]
def testCanonicalizeByNodeName(self): """ In forming a canonical tree, child nodes must be sorted by name if node lengths and number of tips are equal. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='c'), TreeNode(name='d'), TreeNode(name='b'), TreeNode(name='a')]) self.assertEqual( ['c', 'd', 'b', 'a'], [child.name for child in njtree.tree.children]) self.assertEqual( ['a', 'b', 'c', 'd'], [child.name for child in njtree.canonicalize().tree.children])
def _setup_linked_list(self, kwargs_list): last_node = None for idx, kwargs in enumerate(kwargs_list): new_node = TreeNode(**kwargs) if last_node is not None: new_node.append(last_node) last_node = new_node return last_node
def test_gops(self): """Basic TreeNode operations should work as expected""" p = TreeNode() self.assertEqual(str(p), ';') p.name = 'abc' self.assertEqual(str(p), 'abc;') p.length = 3 self.assertEqual(str(p), 'abc:3;') # don't suppress branch from root q = TreeNode() p.append(q) self.assertEqual(str(p), '()abc:3;') r = TreeNode() q.append(r) self.assertEqual(str(p), '(())abc:3;') r.name = 'xyz' self.assertEqual(str(p), '((xyz))abc:3;') q.length = 2 self.assertEqual(str(p), '((xyz):2)abc:3;')
def testRootByTwoTreeNodes(self): """ Rooting by two TreeNodes must work. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(name='c'), TreeNode(name='d'), TreeNode(name='b'), TreeNode(name='a')]) node1 = njtree.tree.find('a') node2 = njtree.tree.find('b') self.assertEqual( ['c', 'd', 'b', 'a'], [child.name for child in njtree.root([node1, node2]).tree.children])
def test_siblings(self): """Get the siblings""" exp = [] obs = self.simple_t.siblings() self.assertEqual(obs, exp) exp = ['i2'] obs = self.simple_t.children[0].siblings() self.assertEqual([o.name for o in obs], exp) exp = ['c'] obs = self.simple_t.children[1].children[1].siblings() self.assertEqual([o.name for o in obs], exp) self.simple_t.append(TreeNode(name="foo")) self.simple_t.append(TreeNode(name="bar")) exp = ['i1', 'foo', 'bar'] obs = self.simple_t.children[1].siblings() self.assertEqual([o.name for o in obs], exp)
def tree_nodify(G, node, added_tips = {}): """ Convert network x """ # Recursive solution: # If node has children, add the TreeNoded version of (each) child # If node has no children, convert to a TreeNode and children = [x[1] for x in G.out_edges(node)] if len(children) == 0: name = node.char_string if name in added_tips: added_tips[name] += 1 return None, added_tips else: added_tips[name] = 1 return TreeNode(name=name, length=None, parent=None, children=[]), added_tips else: name=str(node.name) if name == 'state-node': name = np.random.randint(MAX_RAND) # Make sure we do not keep repeated tips, condense clean_children = [] for child in children: node, added_tips = tree_nodify(G, child, added_tips) if node is not None: clean_children.append(node) # If this node has no clean children, remove it as well if len(clean_children) == 0: return None, added_tips return TreeNode(name=str(name), parent=None, children=clean_children), added_tips
def test_pre_and_postorder_no_children(self): t = TreeNode('brofist') # include self exp = ['brofist'] obs = [n.name for n in t.pre_and_postorder()] self.assertEqual(obs, exp) # do not include self obs = list(t.pre_and_postorder(include_self=False)) self.assertEqual(obs, [])
def testCountCladesTwoChildren(self): """ In a tree with two children, one of which has two children, there are two clades. """ njtree = NJTree() njtree.tree = TreeNode(children=[ TreeNode(children=[ TreeNode(name='a'), TreeNode(name='b'), ]), TreeNode(name='c'), ]) self.assertEqual( { frozenset(['a', 'b']): 1, frozenset(['a', 'b', 'c']): 1, }, njtree.countClades() )
def test_missing_tip_name(self): """DndParser should produce the correct tree when missing a name""" obs = TreeNode.from_newick(missing_tip_name) exp = TreeNode() exp.append(TreeNode()) exp.append(TreeNode()) exp.children[0].append(TreeNode(name='a')) exp.children[0].append(TreeNode(name='b')) exp.children[1].append(TreeNode(name='c')) exp.children[1].append(TreeNode()) self.assertEqual(str(obs), str(exp))
def test_nonames(self): """DndParser should produce the correct tree when there are no names""" obs = TreeNode.from_newick(no_names) exp = TreeNode() exp.append(TreeNode()) exp.append(TreeNode()) exp.children[0].append(TreeNode()) exp.children[0].append(TreeNode()) exp.children[1].append(TreeNode()) exp.children[1].append(TreeNode()) self.assertEqual(str(obs), str(exp))
def _canonicalize(node): """ Canonicalize node. See docstring for C{canonicalize}, above. """ # This is very inefficient. The key function (above) computes # sorted lists of node tip names repeatedly in a naive way. # That could be done more efficiently by working from the # leaves to the root, combining tip names. For now just use # this slow but simple approach. if node.is_tip(): return node.copy() else: children = list(map(_canonicalize, node.children)) children.sort(key=_key) new = TreeNode(children=children, length=node.length, name=node.name) try: new.support = node.support except AttributeError: pass return new
def test_remove(self): """Remove nodes""" self.assertTrue(self.simple_t.remove(self.simple_t.children[0])) self.assertEqual(len(self.simple_t.children), 1) n = TreeNode() self.assertFalse(self.simple_t.remove(n))
def test_minimal(self): """DndParser should produce the correct minimal tree""" obs = TreeNode.from_newick(minimal) exp = TreeNode() exp.append(TreeNode()) self.assertEqual(str(obs), str(exp))