class TestNodeDescendantsFunctionality(unittest.TestCase): def setUp(self): self.test_obj = Node("A", "1.0") self.test_descendant = Node("D", "2.0") self.lengths = ["2.0", "3.0", "4.0"] @data(["D1.1", "D1.2", "D1.3"], ["D", "", ""], ["", "", ""]) def test_node_representation_with_deeper_descendants(self, test_data): """ :param test_data: names of descendants Procedure: 1. Make simple tree with one descendant having two another descendants inside 2. Verify if it's newick representation is correct in comparision to parsed "proper_result" :return: """ single_nodes_reprs = [ "{0}:{1}".format(name, length) for name, length in zip(test_data, self.lengths)] proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs) d1, d2, d3 = [Node(name, length) for name, length in zip(test_data, self.lengths)] d1.add_descendant(d2) d1.add_descendant(d3) self.test_obj.add_descendant(d1) self.assertEqual(proper_result, self.test_obj.newick) def test_node_as_descendants_list(self): self.test_obj.add_descendant(self.test_descendant) self.assertListEqual([self.test_descendant], self.test_obj.descendants)
class TestNodeDescendantsFunctionality(unittest.TestCase): def setUp(self): self.test_obj = Node("A", "1.0") self.test_descendant = Node("D", "2.0") self.lengths = ["2.0", "3.0", "4.0"] @data(["D1.1", "D1.2", "D1.3"], ["D", "", ""], ["", "", ""]) def test_node_representation_with_deeper_descendants(self, test_data): """ :param test_data: names of descendants Procedure: 1. Make simple tree with one descendant having two another descendants inside 2. Verify if it's newick representation is correct in comparision to parsed "proper_result" :return: """ single_nodes_reprs = [ "{0}:{1}".format(name, length) for name, length in zip(test_data, self.lengths) ] proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs) d1, d2, d3 = [ Node(name, length) for name, length in zip(test_data, self.lengths) ] d1.add_descendant(d2) d1.add_descendant(d3) self.test_obj.add_descendant(d1) self.assertEqual(proper_result, self.test_obj.newick) def test_node_as_descendants_list(self): self.test_obj.add_descendant(self.test_descendant) self.assertListEqual([self.test_descendant], self.test_obj.descendants)
def newick_node(self, nodes=None, template=None): template = template or self._newick_default_template n = Node(name=template.format(l=self), length='1') # noqa: E741 children = self.children if nodes is None else self.children_from_nodemap( nodes) for nn in sorted(children, key=lambda nn: nn.name): n.add_descendant(nn.newick_node(nodes=nodes, template=template)) return n
def test_Node_custom_length(self): root = Node.create(length=100., length_formatter="{:0.1e}".format) self.assertEqual(root.newick, ':1.0e+02') weird_numbers_tree = "((a:1.e2,b:3j),(c:0x0BEFD6B0,d:003))" root = loads(weird_numbers_tree, length_parser=None)[0] self.assertEqual(weird_numbers_tree, root.newick) with self.assertRaises(ValueError): root = Node.create(length=1., length_formatter="({:0.1e})".format) root.newick
def newick_node(self, nodes=None): label = '{0} [{1}]'.format( self.name.replace(',', '/').replace('(', '{').replace(')', '}'), self.id) if self.iso: label += '[%s]' % self.iso if self.level == Level.language: label += '-l-' n = Node(name="'{0}'".format(label), length='1') children = self.children if nodes is None else self.children_from_nodemap( nodes) for nn in sorted(children, key=lambda nn: nn.name): n.add_descendant(nn.newick_node(nodes=nodes)) return n
def test_Node(self): with self.assertRaises(ValueError): Node(name='A)') root = loads('(A,B,(C,D)E)F;')[0] self.assertEqual( [n.name for n in root.walk()], ['F', 'A', 'B', 'E', 'C', 'D']) self.assertEqual( [n.name for n in root.walk() if n.is_leaf], ['A', 'B', 'C', 'D']) self.assertEqual( [n.name for n in root.walk(mode='postorder')], ['A', 'B', 'C', 'D', 'E', 'F']) self.assertEqual(root.ancestor, None) self.assertEqual(root.descendants[0].ancestor, root) root = loads('(((a,b),(c,d)),e);')[0] self.assertEqual( [n.ancestor.newick for n in root.walk() if n.ancestor], [ '(((a,b),(c,d)),e)', '((a,b),(c,d))', '(a,b)', '(a,b)', '((a,b),(c,d))', '(c,d)', '(c,d)', '(((a,b),(c,d)),e)'])
def test_node_with_parameters(self, test_set): if "length" in test_set: proper_length = 3.0 else: proper_length = 0.0 test_obj = Node(**test_set) self.assertEqual(test_set["name"], test_obj.name) self.assertEqual(proper_length, test_obj.length)
def upgma(distance_matrix, names=None): """Cluster based on distance matrix dist using UPGMA That is, the Unweighted Pair Group Method with Arithmetic Mean algorithm If node names are given (not None), they must be a sequence of the same length as the size of the square distance_matrix. The edge lengths in the tree are not useful for the time being. """ # Initialize nodes nodes = [Node(name) for name in (names or range(len(distance_matrix)))] # Iterate until the number of clusters is k nc = len(distance_matrix) while nc > 1: # Calculate the pairwise distance of each cluster, while searching for pair with least distance minimum_distance = numpy.inf i, j = 0, 1 for i in range(nc - 1): for j in range(i + 1, nc): dis = distance_matrix[i, j] if dis < minimum_distance: minimum_distance = dis cluster = nodes[i], nodes[j] indices = i, j # Merge these two nodes into one new node i, j = indices distance_matrix[i] = 0.5 * (distance_matrix[i]) + 0.5 * ( distance_matrix[j]) distance_matrix[:, i] = 0.5 * (distance_matrix[:, i]) + 0.5 * ( distance_matrix[:, j]) nodes[i] = Node.create(descendants=cluster) for c in cluster: c.length = distance_matrix[i, i] distance_matrix = numpy.delete(distance_matrix, j, 0) distance_matrix = numpy.delete(distance_matrix, j, 1) del nodes[j] nc -= 1 return nodes[0]
def rename_none_node(node_to_rename: newick.Node, counter): """ Renaming node with no name to differ from other not named node. :param node_to_rename: node to be checked :param counter: int; counter for none nodes :return: (Node, int) """ if node_to_rename.name is None: node_to_rename.name = str(node_to_rename.name) + "_" + str(counter) counter += 1 return node_to_rename, counter
def newick_node(self, nodes=None, template=None, maxlevel=None, level=0) -> Node: """ Return a `newick.Node` representing the subtree of the Glottolog classification starting at the languoid. :param template: Python format string accepting the `Languoid` instance as single \ variable named `l`, used to format node labels. """ template = template or self._newick_default_template n = Node(name=template.format(l=self), length='1') # noqa: E741 children = self.children if nodes is None else self.children_from_nodemap(nodes) for nn in sorted(children, key=lambda nn: nn.name): if maxlevel: if (isinstance(maxlevel, config.LanguoidLevel) and nn.level > maxlevel) or \ (not isinstance(maxlevel, config.LanguoidLevel) and level > maxlevel): continue n.add_descendant( nn.newick_node(nodes=nodes, template=template, maxlevel=maxlevel, level=level + 1)) return n
def phytree_from_groups(groups: List[set]): class TreeBlock: name: set contains: List[str] def __init__(self, name: set): self.name = name self.contains = [] def name_to_str(self) -> str: name_str = re.sub(r"[,']", "", str(sorted(self.name))).replace( "[", "{").replace("]", "}") return name_str def __str__(self): return f"Group: {self.name}, Contains: {self.contains}" cluster_sets = [ set(filter(lambda x: re.match(r'[a-zA-Z0-9]+', x), set(c))) for c in groups ] cluster_sets = sorted(cluster_sets, key=lambda x: len(x), reverse=True) blocks = [TreeBlock(s) for s in cluster_sets] for b in blocks: for other_block in blocks: if b.name > other_block.name: b.contains.append(other_block.name_to_str()) blocks = list(sorted(blocks, key=lambda b: len(b.contains))) nodes: List[Node] = [] for b in blocks: node = Node(b.name_to_str()) for d in b.contains: descendant = list(filter(lambda n: n.name == d, nodes)) if len(descendant) > 0: node.descendants.append(descendant[0]) node.descendants = list( sorted(node.descendants, key=lambda d: d.name)) nodes.remove(descendant[0]) nodes.append(node) return PhyTree(nodes)
def _convert_to_phyloxml(self, seq_id_to_seq_name: Dict[SequenceID, str] = None) -> str: if not self.nodes: return None newick_str = self._convert_to_newick(seq_id_to_seq_name) tree = Phylo.read(StringIO(newick_str), 'newick') Phylo.write(tree, 'drzewko.xml', 'phyloxml') tree_xml = Phylo.PhyloXMLIO.read("drzewko.xml") sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id) nodes_to_process = [(None, sorted_nodes[0])] newick_tree = None while nodes_to_process: n = nodes_to_process.pop() node_parent_label = n[0] node = n[1] if seq_id_to_seq_name: label = seq_id_to_seq_name[node.sequences_ids[0]] if len( node.sequences_ids ) == 1 else f"Consenses {node.consensus_id}" else: label = node.sequences_ids[0].value if len( node.sequences_ids ) == 1 else f"Consensus {node.consensus_id}" if node.parent_node_id is None: length = "1" else: parent_minComp = sorted_nodes[ node.parent_node_id].mincomp.root_value().value length = str((1 - parent_minComp) - (1 - node.mincomp.root_value().value)) newick_node = Node(name=label, length=length) if newick_tree is None: newick_tree = newick_node else: parent_node = newick_tree.get_node(node_parent_label) parent_node.add_descendant(newick_node) for child in node.children_nodes_ids: nodes_to_process.append((label, sorted_nodes[child])) return dumps(newick_tree)
def test_node_representation_with_deeper_descendants(self, test_data): """ :param test_data: names of descendants Procedure: 1. Make simple tree with one descendant having two another descendants inside 2. Verify if it's newick representation is correct in comparision to parsed "proper_result" :return: """ single_nodes_reprs = [ "{0}:{1}".format(name, length) for name, length in zip(test_data, self.lengths)] proper_result = "(({1},{2}){0})A:1.0".format(*single_nodes_reprs) d1, d2, d3 = [Node(name, length) for name, length in zip(test_data, self.lengths)] d1.add_descendant(d2) d1.add_descendant(d3) self.test_obj.add_descendant(d1) self.assertEqual(proper_result, self.test_obj.newick)
def test_Node(): with pytest.raises(ValueError): Node(name='A)') root = loads('(A,B,(C,D)E)F;')[0] assert [n.name for n in root.walk()] == ['F', 'A', 'B', 'E', 'C', 'D'] assert [n.name for n in root.walk() if n.is_leaf] == ['A', 'B', 'C', 'D'] assert [n.name for n in root.walk(mode='postorder') ] == ['A', 'B', 'C', 'D', 'E', 'F'] assert root.ancestor is None assert root.descendants[0].ancestor == root root = loads('(((a,b),(c,d)),e);')[0] assert [n.ancestor.newick for n in root.walk() if n.ancestor] == \ [ '(((a,b),(c,d)),e)', '((a,b),(c,d))', '(a,b)', '(a,b)', '((a,b),(c,d))', '(c,d)', '(c,d)', '(((a,b),(c,d)),e)']
def clone_node(n): c = Node(name=n.name) for nn in n.descendants: c.add_descendant(clone_node(nn)) return c
def test_node_length_changeability(self): test_obj = Node(length="10") self.assertEqual(10, test_obj.length) test_obj.length = "12" self.assertEqual(12, test_obj.length)
def test_node_parameters_changeability(self): test_obj = Node(name="A") self.assertEqual("A", test_obj.name) test_obj.name = "B" self.assertEqual("B", test_obj.name)
def test_node_newick_representation_with_length(self): test_obj = Node(name="A", length="3") self.assertEqual("A:3", test_obj.newick)
def setUp(self): self.test_obj = Node()
def test_Node_custom_length(): root = Node.create(length='1e2', length_parser=lambda l: l + 'i') assert root.length == '1e2i' root = Node.create(length_formatter=lambda l: 5) root.length = 10 assert root.length == pytest.approx(5)
def test_repr(): n = Node(name="A") assert repr(n) == 'Node("A")'
def setUp(self): self.test_obj = Node("A", "1.0") self.test_descendant = Node("D", "2.0") self.lengths = ["2.0", "3.0", "4.0"]
def _convert_to_newick(self, seq_id_to_metadata: Dict[SequenceID, str] = None) -> str: def newick_nhx(newick_tree): """The representation of the Node in Newick format.""" label = newick_tree.name or '' if newick_tree._length: for cn in self.nodes: if str(cn.consensus_id) == newick_tree.name: if seq_id_to_metadata: if len(cn.sequences_ids) == 1: name = seq_id_to_metadata[ cn.sequences_ids[0]]["name"] group = seq_id_to_metadata[ cn.sequences_ids[0]]["group"] seqid = cn.sequences_ids[0] metadata = f"[&&NHX:name={name}:group={group}:seqid={seqid}:mincomp={cn.mincomp}]" else: name = f"Consensus {cn.consensus_id}" metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]" else: if len(cn.sequences_ids) == 1: name = cn.sequences_ids[0] else: name = f"Consensus {cn.consensus_id}" mincomp = cn.mincomp metadata = f"[&&NHX:name={name}:mincomp={mincomp}]" label += ':' + newick_tree._length + metadata descendants = ','.join( [newick_nhx(n) for n in newick_tree.descendants]) if descendants: descendants = '(' + descendants + ')' return descendants + label if not self.nodes: return None sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id) nodes_to_process = [(None, sorted_nodes[0])] newick_tree = None while nodes_to_process: n = nodes_to_process.pop() node_parent_label = n[0] node = n[1] label = str(node.consensus_id) if node.parent_node_id is None: length = "1" else: parent_minComp = sorted_nodes[ node.parent_node_id].mincomp.root_value().value length = str((1 - parent_minComp) - (1 - node.mincomp.root_value().value)) newick_node = Node(name=label, length=length) if newick_tree is None: newick_tree = newick_node else: parent_node = newick_tree.get_node(node_parent_label) parent_node.add_descendant(newick_node) for child in node.children_nodes_ids: nodes_to_process.append((label, sorted_nodes[child])) return "(" + newick_nhx(newick_tree) + ")"
def test_repr(self): n = Node(name="A") self.assertEqual(repr(n), 'Node("A")')
def node(): return Node()
def _add_leaf(node: Node, target: str, leaf: str): if PhyTree._is_group(node.name): if node.name == target: node.add_descendant(Node(f"{{{leaf}}}")) else: if PhyTree._is_group(target): expected_leaves = set(PhyTree._get_group_leaves(target)) else: expected_leaves = {target} for child in node.descendants: if PhyTree._is_group(child.name): child_leaves = set( PhyTree._get_group_leaves(child.name)) if expected_leaves <= child_leaves: PhyTree._add_leaf(child, target, leaf) break elif PhyTree._is_leaf(child.name): if child.name in expected_leaves: PhyTree._add_leaf(child, target, leaf) else: raise ValueError(f"Couldn't find {target}") elif PhyTree._is_leaf(node.name): if node.name == target: if node.name != '{}': node.add_descendant(Node(node.name)) node.add_descendant(Node(f"{{{leaf}}}")) else: node.name = f"{{{leaf}}}" return else: raise ValueError(f"Unexpected leaf: {node.name}") else: raise ValueError(f"Couldn't recognize {leaf} as a leaf or a group") node.name = node.name.replace("}", f" {leaf}}}")
def test_Node_custom_length(self): root = Node.create(length='1e2', length_parser=lambda l: l + 'i') self.assertEqual(root.length, '1e2i') root = Node.create(length_formatter=lambda l: 5) root.length = 10 self.assertAlmostEqual(root.length, 5)