def __init__(self, tree=None, length_attr="length", *args, **kwargs): """ Parameters ---------- tree : either a PhyloNode or TreeNode instance length_attr : str name of the attribute to use for length, defaults to 'length' """ if tree is not None: children = [ type(self)(child, *args, **kwargs) for child in tree.children ] PhyloNode.__init__(self, params=tree.params.copy(), children=children, name=tree.name) else: PhyloNode.__init__(self, **kwargs) # todo do we need to validate the length_attr key exists? self._length = length_attr self._node_space = 1.3 self._start = None self._end = None self._y = None self._x = None self._tip_rank = None self._max_x = 0 self._min_x = 0 self._max_y = 0 self._min_y = 0 self._theta = 0 self._num_tips = 1
def test_missing_tip_name(self): """DndParser should produce the correct tree when missing a name""" obs = DndParser(missing_tip_name) exp = PhyloNode() exp.append(PhyloNode()) exp.append(PhyloNode()) exp.children[0].append(PhyloNode(name="a")) exp.children[0].append(PhyloNode(name="b")) exp.children[1].append(PhyloNode(name="c")) exp.children[1].append(PhyloNode()) self.assertEqual(str(obs), str(exp))
def test_nonames(self): """DndParser should produce the correct tree when there are no names""" obs = DndParser(no_names) exp = PhyloNode() exp.append(PhyloNode()) exp.append(PhyloNode()) exp.children[0].append(PhyloNode()) exp.children[0].append(PhyloNode()) exp.children[1].append(PhyloNode()) exp.children[1].append(PhyloNode()) self.assertEqual(str(obs), str(exp))
def condense_node_order(matrix, smallest_index, node_order): """condenses two nodes in node_order based on smallest_index info This function is used to create a tree while condensing a matrix with the condense_matrix function. The smallest_index is retrieved with find_smallest_index. The first index is replaced with a node object that combines the two nodes corresponding to the indices in node order. The second index in smallest_index is replaced with None. Also sets the branch length of the nodes to 1/2 of the distance between the nodes in the matrix""" index1, index2 = smallest_index node1 = node_order[index1] node2 = node_order[index2] # get the distance between the nodes and assign 1/2 the distance to the # lengthproperty of each node distance = matrix[index1, index2] nodes = [node1, node2] d = distance / 2.0 for n in nodes: if n.children: n.length = d - n.children[0].TipLength else: n.length = d n.TipLength = d # combine the two nodes into a new PhyloNode object new_node = PhyloNode() new_node.children.append(node1) new_node.children.append(node2) node1.parent = new_node node2.parent = new_node # replace the object at index1 with the combined node node_order[index1] = new_node # replace the object at index2 with None node_order[index2] = None return node_order
def test_gops(self): """Basic PhyloNode operations should work as expected""" p = PhyloNode() self.assertEqual(str(p), ";") p.name = "abc" self.assertEqual(str(p), "abc;") p.length = 3 self.assertEqual(str(p), "abc:3;") # don't suppress branch from root q = PhyloNode() p.append(q) self.assertEqual(str(p), "()abc:3;") r = PhyloNode() q.append(r) self.assertEqual(str(p), "(())abc:3;") r.name = "xyz" self.assertEqual(str(p), "((xyz))abc:3;") q.length = 2 self.assertEqual(str(p), "((xyz):2)abc:3;")
def test_minimal(self): """DndParser should produce the correct minimal tree""" obs = DndParser(minimal) exp = PhyloNode() exp.append(PhyloNode()) self.assertEqual(str(obs), str(exp))
def get_tree(self, just_members=True): """returns the gene tree with tip names as gene stableid's Arguments: ---------- - just_members: limits tips to just members of self """ gtrn = self.compara.ComparaDb.get_table("gene_tree_node") condition = gtrn.c.root_id == list(self._gene_tree_root)[0] query = sql.select( [ gtrn.c.node_id, gtrn.c.parent_id, gtrn.c.distance_to_parent, gtrn.c.seq_member_id, ], whereclause=condition, ) records = query.execute().fetchall() # get the gene stable IDs, via join of seq_member with gene_member # on seq_member_id seqmem_ids = [r["seq_member_id"] for r in records] seqmem = self.compara.ComparaDb.get_table("seq_member") genmem = self.compara.ComparaDb.get_table("gene_member") condition = genmem.c.gene_member_id == seqmem.c.gene_member_id joined = seqmem.join( genmem, genmem.c.gene_member_id == seqmem.c.gene_member_id) query = (sql.select([ joined.c.seq_member_seq_member_id, joined.c.gene_member_stable_id ]).where(joined.c.seq_member_seq_member_id.in_( seqmem_ids)).select_from(joined)) gene_ids = dict(query.execute().fetchall()) nodes = {} parents = defaultdict(list) for record in records: parent_id = record["parent_id"] node_id = record["node_id"] length = record["distance_to_parent"] name = gene_ids.get(record["seq_member_id"], None) node = PhyloNode(length=length, name=name) nodes[node_id] = node parents[parent_id].append(node) root = None for parent in parents: if parent not in nodes: node = PhyloNode(name="root") nodes[parent] = node node = nodes[parent] for child in parents[parent]: child.parent = node if len(parents[parent]) == 1: root = node if just_members: stableids = [g.stableid for g in self.members] root = root.get_sub_tree(stableids) return root
def get_species_tree(self, just_members=True): """returns the species tree Arguments: ---------- - just_members: limits tips to just members of self """ # grab the Ensembl species tree root ID sptr = self.ComparaDb.get_table("species_tree_root") condition = sptr.c.label == "Ensembl" query = sql.select([sptr.c.root_id], whereclause=condition) records = query.execute().fetchall() assert len(records) == 1, records root_id = records[0]["root_id"] # get the tree nodes sptn = self.ComparaDb.get_table("species_tree_node") condition = sql.and_(sptn.c.root_id == root_id) query = sql.select([sptn], whereclause=condition) records = query.execute().fetchall() # get the genome db -> name map gen_db = self.ComparaDb.get_table("genome_db") db_ids = [r["genome_db_id"] for r in records] query = sql.select( [gen_db.c.genome_db_id, gen_db.c.name], whereclause=gen_db.c.genome_db_id.in_(db_ids), ) id_name = dict(query.execute().fetchall()) for id_, name in id_name.items(): name = _Species.get_species_name(name) id_name[id_] = None if name == "None" else name nodes = {} parents = defaultdict(list) for record in records: parent_id = record["parent_id"] node_id = record["node_id"] length = record["distance_to_parent"] name = record["node_name"] gen_dbid = record["genome_db_id"] n = None if gen_dbid is None else id_name[gen_dbid] name = name if n is None else n node = PhyloNode(length=length, name=name) nodes[node_id] = node parents[parent_id].append(node) root = None for parent, value in parents.items(): if parent not in nodes: node = PhyloNode(name="root") nodes[parent] = node node = nodes[parent] for child in parents[parent]: child.parent = node if len(value) == 1: root = node # convert tip-names to match genome db names if just_members: root = root.get_sub_tree(self.species, tipsonly=True) return root