def test_redundant_set(self): parent = dendropy.Node(label="parent") assigned_ch = [dendropy.Node(label=c) for c in ["c1", "c2", "c3"]] parent.set_child_nodes(assigned_ch) parent.set_child_nodes(assigned_ch) ch2 = assigned_ch + assigned_ch parent.set_child_nodes(ch2) self.assertEqual(parent._child_nodes, assigned_ch) for nd in parent.child_node_iter(): self.assertIs(nd.parent_node, parent)
def test_edge_tail_node_setting(self): parent = dendropy.Node(label="parent") assigned_ch = [dendropy.Node(label=c) for c in ["c1", "c2", "c3"]] for ch in assigned_ch: ch.edge.tail_node = parent for ch in assigned_ch: self.assertEqual(parent._child_nodes, assigned_ch) for nd in parent.child_node_iter(): self.assertIs(nd.parent_node, parent) self.assertIs(nd.edge.tail_node, parent) self.assertIs(nd.edge.head_node, nd)
def test_new_child_at_pos(self): new_child_labels = ["c1", "c2", "c3"] insert_ch_label = "x1" for pos in range(len(new_child_labels) + 1): parent = dendropy.Node(label="parent") assigned_ch = [dendropy.Node(label=c) for c in new_child_labels] parent.set_child_nodes(assigned_ch) parent.insert_new_child(pos, label=insert_ch_label) x = 0 for idx, ch in enumerate(parent._child_nodes): if idx == pos: self.assertEqual(ch.label, insert_ch_label) else: self.assertEqual(ch.label, new_child_labels[x]) x += 1
def test_remove_child(self): assigned_child_labels = ["c1", "c2", "c3"] for remove_idx in range(len(assigned_child_labels)): parent = dendropy.Node(label="parent") assigned_ch = [ dendropy.Node(label=c) for c in assigned_child_labels ] parent.set_child_nodes(assigned_ch) ch_nodes = list(parent._child_nodes) to_remove = ch_nodes[remove_idx] x = parent.remove_child(to_remove) self.assertIs(to_remove, x) ch_nodes.remove(to_remove) ch_nodes2 = list(parent._child_nodes) self.assertEqual(ch_nodes, ch_nodes2)
def resolveTree(startTree): used = set() for edge in startTree.postorder_edge_iter(): head, tail = edge.head_node, edge.tail_node for subtree, bitmask in edge.desc.items(): if (subtree, bitmask) in used: continue used.add((subtree, bitmask)) used.add((subtree, bitmask ^ subtree.startTreeBitmask)) edgeSet = subtree.fullSubEdgeMap.get(bitmask, []) edgeSet.extend( reversed( subtree.fullSubEdgeMap.get( bitmask ^ subtree.startTreeBitmask, []))) for branch in edgeSet: if branch.tail_node in subtree.nodeMap: joinPoint = subtree.nodeMap[branch.tail_node] else: joinPoint = dendropy.Node() if tail is not None: tail.remove_child(head) tail.add_child(joinPoint) joinPoint.add_child(head) head = joinPoint joinPoint.edge.desc = dict(edge.desc) branch.tail_node.remove_child(branch.head_node) joinPoint.add_child(branch.head_node) return startTree
def json_to_dendropy_sub(json, node, taxon_set): ''' recursively calls itself for all children of node and builds up the tree. entries in json are added as node attributes ''' if 'xvalue' in json: node.xvalue = float(json['xvalue']) for attr, val in json.iteritems(): if attr == 'children': for sub_json in val: child_node = dendropy.Node() json_to_dendropy_sub(sub_json, child_node, taxon_set) if hasattr(child_node, 'xvalue'): node.add_child(child_node, edge_length=child_node.xvalue - node.xvalue) elif hasattr(child_node, 'branch_length'): node.add_child(child_node, edge_length=child_node.branch_length) else: node.add_child(child_node, edge_length=1.0) else: try: node.__setattr__(attr, float(val)) except: if val == 'undefined': node.__setattr__(attr, None) else: node.__setattr__(attr, val) if len(node.child_nodes()) == 0: node.taxon = dendropy.Taxon(label=json['strain'].upper()) node.strain = json['strain'] taxon_set.add_taxon(node.taxon)
def nx_tree_to_dd_tree(T): # build dict from string label to dendropy node T.add_node('my_root') random_node = next(iter(T.nodes())) near = next(iter(T.neighbors(random_node))) T.remove_edge(random_node, near) T.add_edge(random_node, 'my_root') T.add_edge(near, 'my_root') label_node = dict() for v in T.nodes(): dd_node = dd.Node(label=v) label_node[v] = dd_node # root tree at random node # root = next(iter(T.nodes())) dd_tree = dd.Tree() dd_tree.seed_node = label_node['my_root'] # print(root) # add the edges in the tree for v, successors in nx.bfs_successors(T, 'my_root'): dd_node = label_node[v] for s in successors: dd_child = label_node[s] dd_node.add_child(dd_child) # nx.draw(T, with_labels = True) # plt.show() return dd_tree
def mergeLineages(lin1, lin2): node = dendropy.Node() node.gens = 0 node.edge_length = 0.0 node.add_child(lin1) node.add_child(lin2) return node
def test_edge_head_node_setting(self): node = dendropy.Node(label="x") edge1 = node.edge edge2 = dendropy.Edge() node.edge = edge2 self.assertIs(node.edge, edge2) self.assertIs(node.edge.head_node, node)
def _build_lineage_queue( self, lineage_collection, max_time, is_drop_extinct, node_attr, label_template, ): lineageq = ProtractedSpeciationProcess._LineageQueue() for lineage in lineage_collection: if not is_drop_extinct or not lineage.is_extinct: node = dendropy.Node() if is_drop_extinct: node._time = max_time else: node._time = lineage.extinction_time if lineage.extinction_time is not None else max_time label = label_template.format(species_id=lineage.species_id, lineage_id=lineage.lineage_id) node._taxon_label = label node._lineage_id = lineage.lineage_id node._species_id = lineage.species_id setattr(lineage, node_attr, node) lineageq.push_lineage(lineage=lineage, is_copy=True) else: lineageq.register_lineage_reference(lineage=lineage) return lineageq
def join_nodes_in_one_tree(tree1, nodeAinT1, cladeA, tree2, nodeBinT2, cladeB): """Join clade A and clade B in just one tree 1. Re-root tree 1 as (A,X); and extract (A,X); and A 2. Re-root tree 2 as (B,Y); and extract (B,Y); and B 3. Build new tree 2 as (A,(B,Y)); 4. Return tree 1 and tree 2 Parameters ---------- tree1 : dendropy tree object nodeAinT1 : dendropy node object cladeA : list of str taxon labels below node A tree2 : dendropy node object nodeBinT2 : dendropy node object cladeB : list of str taxon labels below node B Returns ------- tree1 : dendropy tree object tree2 : dendropy tree object """ [tree1, nodeAinT1] = extract_nodes_from_split(tree1, nodeAinT1, cladeA) [tree2, nodeBinT2] = extract_nodes_from_split(tree2, nodeBinT2, cladeB) root = dendropy.Node() root.add_child(deepcopy(nodeAinT1)) # TODO: Remove deep copies! root.add_child(tree2.seed_node) tree2 = dendropy.Tree(seed_node=root) tree2.is_rooted = True return [tree1, tree2]
def pure_kingman_tree(taxon_namespace, pop_size=1, rng=None): """ Generates a tree under the unconstrained Kingman's coalescent process. Parameters ---------- taxon_namespace: |TaxonNamespace| instance A pre-populated |TaxonNamespace| where the contained |Taxon| instances represent the genes or individuals sampled from the population. pop_size : numeric The size of the population from the which the coalescent process is sampled. Returns ------- t : |Tree| A tree sampled from the Kingman's neutral coalescent. """ if rng is None: rng = GLOBAL_RNG # use the global rng by default nodes = [dendropy.Node(taxon=t) for t in taxon_namespace] seed_node = coalesce_nodes(nodes=nodes, pop_size=pop_size, period=None, rng=rng, use_expected_tmrca=False)[0] tree = dendropy.Tree(taxon_namespace=taxon_namespace, seed_node=seed_node) return tree
def pure_kingman_tree_shape(num_leaves, pop_size=1, rng=None): """ Like :func:`dendropy.model.pure_kingman_tree`, but does not assign taxa to tips. Parameters ---------- num_leaves : int Number of individuals/genes sampled. pop_size : numeric The size of the population from the which the coalescent process is sampled. Returns ------- t : |Tree| A tree sampled from the Kingman's neutral coalescent. """ if rng is None: rng = GLOBAL_RNG # use the global rng by default nodes = [dendropy.Node() for t in range(num_leaves)] seed_node = coalesce_nodes(nodes=nodes, pop_size=pop_size, period=None, rng=rng, use_expected_tmrca=False)[0] tree = dendropy.Tree(seed_node=seed_node) return tree
def extend_node(node, model='LRF'): if model not in ['LRF', 'ELRF']: raise ValueError('unsupported model') if (len(node.adjacent_nodes()) <= 3 and node.parent_node != None) or len(node.adjacent_nodes()) <= 2: print(node.adjacent_nodes(), node.incident_edges()) raise ValueError("Insufficient degree. Should not happen ") # store the children in a temporary array children = node.child_nodes() # shuffle the array random.shuffle(children) node.clear_child_nodes() newlab = node.label if model == 'ELRF' else random.choice( ['speciation', 'duplication']) new = node.add_child(dendropy.Node(label=node.label)) # choose between 2 and n-1 children to be connected to the new node k = random.randint(2, len(children) - 1) for i in range(0, len(children)): # add the first k if i < k: new.add_child(children[i]) else: node.add_child(children[i])
def _add_node(tree, time, regraft_node): parent_node = regraft_node[0].parent_node new_node = parent_node.add_child(dpy.Node(), edge_length=time - regraft_node[1]) tree.reindex_subcomponent_taxa() tree.update_splits() return new_node
def generate_star_tree2(): num_tips = 10 branch_length = 1 names = [] for i in range(num_tips + 1): names.append("s" + str(i)) taxon_namespace = dendropy.TaxonNamespace(names) tree = dendropy.Tree(taxon_namespace=taxon_namespace) index = 0 for i in range(num_tips + 1): if index == 0: tree.seed_node.taxon = taxon_namespace.get_taxon("s" + str(0)) tree.seed_node.X = 0 tree.seed_node.time = 0 else: node = dendropy.Node(taxon=taxon_namespace.get_taxon("s" + str(index))) node.edge_length = branch_length node.X = random.gauss(0, 1) node.time = branch_length tree.seed_node.add_child(node) return tree
def insert(placed_edge, query_name, x_1, x_2): tailn = placed_edge.tail_node headn = placed_edge.head_node tailn.remove_child(headn) nn = dy.Node() nn.add_child(headn) qry = dy.Node(taxon=dy.Taxon(query_name)) nn.add_child(qry) qry.edge_length = x_1 tailn.add_child(nn) if placed_edge.head_node in list(master_edge.head_node.ancestor_iter() ) or master_edge == placed_edge: nn.edge_length = placed_edge.length - max(x_2, 0) headn.edge_length = max(x_2, 0) else: nn.edge_length = max(x_2, 0) headn.edge_length = placed_edge.length - max(x_2, 0)
def convert_trees(input, output=None): handle = open(output, 'w') if output else sys.stdout with open(input, 'r') as fp: for line in fp: line = line.rstrip('\n').rstrip('\r') line = line[1:len(line) - 1] splits = re.findall(r'\[[^\]]+\]', line) n = max([int(x) for x in re.findall(r'\d+', line)]) taxon_namespace = dendropy.TaxonNamespace( ['t' + str(i + 1) for i in range(n)]) tree = dendropy.Tree(taxon_namespace=taxon_namespace) nodes = {} for idx in range(n): n = dendropy.Node(edge_length=0.0) n.taxon = taxon_namespace.get_taxon('t' + str(idx + 1)) nodes[str(idx + 1)] = n current_height = 0.0 for s in splits: current_height += 1.0 idxs = re.findall(r'\d+', s) s = Set([]) for i in idxs: s.add(nodes[i]) node = dendropy.Node(edge_length=current_height) for ss in s: node.add_child(ss) for i in idxs: nodes[i] = node tree.seed_node = nodes['1'] for nd in tree.postorder_node_iter(): if nd.parent_node is None: nd.edge.length = 0.0 else: nd.edge.length = nd.parent_node.edge.length - nd.edge.length handle.write(tree.as_string(schema="newick")) if handle is not sys.stdout: handle.close()
def makeTaxTree(splits, contigTax, outname): RANK_PREFIXES = ['k', 'p', 'c', 'o', 'f', 'g', 's'] # Create namespace and node collection names = set() contigTax2 = {} for k, v in contigTax.items(): for i, p in enumerate(RANK_PREFIXES): # Add rank prefixes v[i] = '{}_{}'.format(p, v[i]) v[i] = v[i].replace('(', '[').replace( ')', ']') # Parentheses will break newick contigTax2[k] = v [names.update([k] + v) for k, v in contigTax2.items()] names.update( splits) # We want to have the contigs AND the splits in our tree nodes = {name: dendropy.Node() for name in names} taxa = [] for name, node in nodes.items(): taxon = dendropy.Taxon(name) node.taxon = taxon taxa.append(taxon) namespace = dendropy.TaxonNamespace() namespace.add_taxa(taxa) # Create and populate tree tree = dendropy.Tree(taxon_namespace=namespace) parents = {} removedSplits = set( ) # This shouldn't be needed but since we have taxonomy problems do it for now. for split in splits: contig = split.rsplit('_split', 1)[0] tax = contigTax2[contig] if tax[-1] == 's_Firmicutes bacterium' or tax[ 4] == 'f_Clostridia bacterium [no family in NCBI]': # Weird taxonomy, find solution, avoid for now! print(contig) removedSplits.add(split) continue tree.seed_node.add_child(nodes[tax[0]]) for i in range(1, len(tax)): nodes[tax[i - 1]].add_child(nodes[tax[i]]) if tax[i] not in parents: parents[tax[i]] = set([tax[i - 1]]) else: parents[tax[i]].add(tax[i - 1]) nodes[tax[-1]].add_child(nodes[contig]) nodes[contig].add_child(nodes[split]) # All nodes should have only one parent! for p in parents: if len(parents[p]) > 1: print(p, parents[p]) with open(outname, 'w') as outfile: outfile.write(tree.as_string('newick').replace('\'', '')) return removedSplits
def tree_test_simple(): root = dendropy.Node() child = dendropy.Node() root.add_child(child) root.deme = Deme(3.0, 0.0, 0.1, 0.0, 5.0) child.deme = Deme(1.0, 0.9, 0.1, 5.0, 6.0) child.deme.Nsampled = 10000 tree = dendropy.Tree(seed_node=root) hist = History(tree) lins = hist.simulate(1) assert len(lins) == 1 tree = dendropy.Tree(seed_node=lins[0]) gentree = scaleByGenerations(tree) tree.randomly_assign_taxa() #print str(tree)+';' gentree.randomly_assign_taxa() print str(gentree) + ';'
def merge_children(children, **kwargs): if len(children) == 0: raise ValueError node = dendropy.Node(**kwargs) for child in children: node.add_child(child) if all(hasattr(child, 'mask') for child in children): node.mask = reduce(np.logical_or, (child.mask for child in children)) return node
def test_basic_construction(self): taxon = dendropy.Taxon("z") nd = dendropy.Node(taxon=taxon, label="x", edge_length=1) self.assertIs(nd.taxon, taxon) self.assertEqual(nd.label, "x") edge = nd.edge self.assertEqual(edge.length, 1) self.assertIs(edge.head_node, nd) self.assertIs(edge.tail_node, None)
def leaf(self, taxon, **kwargs): taxon = self._convert_label(taxon) if taxon not in self: raise ValueError("Given taxon must be in the TaxaMetadata object") kwargs['taxon'] = taxon node = dendropy.Node(**kwargs) node.mask = self.taxon2mask(taxon) return node
def garli_safe_add_child(par, child): "Adds an extra node so that the tree stays binary" attacher_nd = dendropy.Node() par_children = par.child_nodes() len(par_children) == 2 r_anc = par_children[-1] r_anc_e_len = r_anc.edge.length par.add_child(attacher_nd, edge_length=0.0) attacher_nd.add_child(child, edge_length=0.0) par.remove_child(r_anc) attacher_nd.add_child(r_anc, edge_length=r_anc_e_len) return attacher_nd, r_anc, r_anc_e_len
def test_set_child_nodes(self): parent = dendropy.Node(label="parent") assigned_ch = [dendropy.Node(label=c) for c in ["c1", "c2", "c3"]] for nd in assigned_ch: x = [dendropy.Node(label=c) for c in ["s1", "s2"]] nd.set_child_nodes(x) nd._expected_children = x parent.set_child_nodes(assigned_ch) for ch in parent._child_nodes: self.assertIn(ch, assigned_ch) self.assertIs(ch._parent_node, parent) self.assertIs(ch.edge.tail_node, parent) self.assertIs(ch.edge.head_node, ch) self.assertEqual(len(ch._child_nodes), len(ch._expected_children)) for sch in ch._child_nodes: self.assertIn(sch, ch._expected_children) self.assertIs(sch._parent_node, ch) self.assertIs(sch.edge.tail_node, ch) self.assertIs(sch.edge.head_node, sch) for ch in assigned_ch: self.assertTrue(ch in parent._child_nodes)
def setUp(self): self.taxa = [ dendropy.Taxon(label=label) for label in ["a", "b", "c", "d"] ] self.n0 = dendropy.Node(label="0", taxon=self.taxa[0]) self.c1 = dendropy.Node(label="1", taxon=None) self.c2 = dendropy.Node(label=None, taxon=self.taxa[1]) self.c3 = dendropy.Node(label=None, taxon=None) self.c3 = dendropy.Node(label=None, taxon=self.taxa[2]) self.p1 = dendropy.Node(label="-1", taxon=self.taxa[3]) self.n0.parent_node = self.p1 self.n0.set_child_nodes([self.c1, self.c2]) self.c2.set_child_nodes([self.c3]) self.nodes = [self.n0, self.c1, self.c2, self.c3, self.p1] for idx, nd in enumerate(self.nodes): if idx % 2 == 0: nd.edge.label = "E{}".format(idx) nd.edge.length = idx an1 = nd.annotations.add_new( "a{}".format(idx), "{}{}{}".format(nd.label, nd.taxon, idx)) an2 = nd.annotations.add_bound_attribute("label") an3 = an1.annotations.add_bound_attribute("name") ae1 = nd.edge.annotations.add_new( "a{}".format(idx), "{}{}".format(nd.edge.label, idx)) ae2 = nd.edge.annotations.add_bound_attribute("label") ae3 = ae1.annotations.add_bound_attribute("name") self.e0 = self.n0._edge
def add_trifurication(tree): parent_node = list(tree.leaf_node_iter())[0].parent_node t1 = dendropy.Taxon(f'X1') t2 = dendropy.Taxon(f'X2') t3 = dendropy.Taxon(f'X3') tree.taxon_namespace.add_taxon(t1) tree.taxon_namespace.add_taxon(t2) tree.taxon_namespace.add_taxon(t3) child_a = dendropy.Node(edge_length=1.234) child_b = dendropy.Node(edge_length=1.234) child_c = dendropy.Node(edge_length=4.123) child_a.taxon = t1 child_b.taxon = t2 child_c.taxon = t3 parent_node.add_child(child_a) parent_node.add_child(child_b) parent_node.add_child(child_c)
def test_redundant_insert_child_at_pos(self): new_child_labels = ["c1", "c2", "c3"] for child_to_insert_idx in range(len(new_child_labels)): for insertion_idx in range(len(new_child_labels)): parent = dendropy.Node(label="parent") assigned_ch = [ dendropy.Node(label=c) for c in new_child_labels ] parent.set_child_nodes(assigned_ch) self.assertEqual(parent._child_nodes, assigned_ch) insert_ch = assigned_ch[child_to_insert_idx] parent.insert_child(insertion_idx, insert_ch) self.assertEqual(len(parent._child_nodes), len(assigned_ch)) self.assertEqual(len(set(parent._child_nodes)), len(parent._child_nodes)) x = 0 for idx, ch in enumerate(parent._child_nodes): if idx == insertion_idx: self.assertIs(ch, insert_ch) self.assertIn(ch, assigned_ch) for ch in assigned_ch: self.assertIn(ch, parent._child_nodes) self.assertEqual(parent._child_nodes.count(ch), 1)
def mean_kingman_tree(taxon_namespace, pop_size=1, rng=None): """ Returns a tree with coalescent intervals given by the expected times under Kingman's neutral coalescent. """ if rng is None: rng = GLOBAL_RNG # use the global rng by default nodes = [dendropy.Node(taxon=t) for t in taxon_namespace] seed_node = coalesce_nodes(nodes=nodes, pop_size=pop_size, period=None, rng=rng, use_expected_tmrca=True)[0] tree = dendropy.Tree(taxon_namespace=taxon_namespace, seed_node=seed_node) return tree
def generate_star_tree(): num_tips = 100 branch_length = 1 fake_step = 0.000000001 names = [] for i in range(num_tips): names.append("s" + str(i)) taxon_namespace = dendropy.TaxonNamespace(names) tree = dendropy.Tree(taxon_namespace=taxon_namespace) current_seed = dendropy.Node() current_seed.edge_length = fake_step i = 0 while i < num_tips: if i == 0: node1 = dendropy.Node(taxon=taxon_namespace.get_taxon("s" + str(i))) node1.edge_length = branch_length node1.X = random.gauss(0, branch_length) current_seed.add_child(node1) i = i + 1 node2 = dendropy.Node(taxon=taxon_namespace.get_taxon("s" + str(i))) node2.edge_length = branch_length node2.X = random.gauss(0, branch_length) current_seed.add_child(node2) current_seed.X = 0 i = i + 1 elif i == num_tips - 1: node = dendropy.Node(taxon=taxon_namespace.get_taxon("s" + str(i))) node.edge_length = branch_length node.X = random.gauss(0, branch_length) i = i + 1 tree.seed_node.X = 0 tree.seed_node.add_child(node) tree.seed_node.add_child(current_seed) else: current_seed2 = dendropy.Node() current_seed2.edge_length = fake_step node = dendropy.Node(taxon=taxon_namespace.get_taxon("s" + str(i))) node.edge_length = branch_length node.X = random.gauss(0, branch_length) i = i + 1 current_seed2.add_child(node) current_seed2.add_child(current_seed) current_seed = current_seed2 current_seed.X = 0 for node in tree.internal_nodes(): node.taxon = taxon_namespace.get_taxon("s" + str(i)) i = i + 1 tree = calculate_times(tree) return tree