def test_4_find_cutoff_with_so_far_values(self, expected_cutoff, compatibilities, so_far_cutoffs): compatibilities = [graph.Compatibility(c) for c in compatibilities] so_far_cutoffs = [graph.Compatibility(c) for c in so_far_cutoffs] actual_cutoff = at_builders._find_node_cutoff(compatibilities, so_far_cutoffs).cutoff self.assertEqual(expected_cutoff, actual_cutoff.value)
def _convert_consensus_paths_to_affinity_tree_nodes(): at_nodes = [] assigned_sequences = [] for c_id, c_info in consensus_paths.items(): assigned_sequences += c_info.assigned_sequences_ids all_seq = p.get_sequences_ids() compatibilities = p.get_compatibilities(all_seq, c_info.path) if len(c_info.assigned_sequences_ids): assigned_seq_comp = [c for seq_id, c in compatibilities.items() if seq_id in c_info.assigned_sequences_ids] mincomp = min(assigned_seq_comp) else: mincomp = 0 new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1), parent=tree.AffinityNodeID(0), sequences=c_info.assigned_sequences_ids, mincomp=mincomp, compatibilities=compatibilities, consensus=c_info.path, children=[]) at_nodes.append(new_node) node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0), sequences=[seq_id for seq_id in p.get_sequences_ids() if seq_id not in assigned_sequences], id_=tree.AffinityNodeID(len(at_nodes) + 1), mincomp=graph.Compatibility(0), children=[]) at_nodes.append(node_for_unassigned_sequences) return at_nodes
def __init__(self, id_: AffinityNodeID, parent: Optional[AffinityNodeID] = None, children: Optional[List[AffinityNodeID]] = None, sequences: Optional[List[msa.SequenceID]] = None, mincomp: Optional[graph.Compatibility] = None, compatibilities: Optional[Dict[msa.SequenceID, graph.Compatibility]] = None, consensus: Optional[graph.SeqPath] = None): self.id_: AffinityNodeID = id_ self.parent: AffinityNodeID = parent self.children: List[AffinityNodeID] = children if children else [] self.sequences: List[msa.SequenceID] = sequences if sequences else [] self.mincomp: graph.Compatibility = mincomp if mincomp else graph.Compatibility( 0) self.compatibilities: Dict[ msa.SequenceID, graph.Compatibility] = compatibilities if compatibilities else {} self.consensus: graph.SeqPath = consensus
def as_newick(self, seq_id_to_metadata: Dict[msa.SequenceID, graph.SequenceMetadata] = None, separate_leaves=False) -> str: """Returns Affinity Tree in Newick format. Args: seq_id_to_metadata: Dictionary of _sequences IDs to the desired name used in newick file. For example: {SequenceID('KM0123'): 'cat', SequenceID('ZX124'): 'dog'} separate_leaves: A switch to control if tree leaves having assigned multiple _sequences should have appended children singleton leaves single sequence assigned. Returns: A string with the Affinity Tree converted to newick format. https://en.wikipedia.org/wiki/Newick_format If the tree has no nodes, an empty string is returned. """ def _get_sequence_attr_if_exists(seq_metadata: graph.SequenceMetadata, attr: str) -> str: """Returns dictionary value if they key attr exists.""" if attr in seq_metadata: return str(seq_metadata[attr]) else: return "" def _newick_nhx(newick_node: newick.Node) -> str: """Converts newick tree to newick string""" node_label = newick_node.name or '' if newick_node._length: for cn in sorted_nodes: if str(cn.id_) == newick_node.name: if seq_id_to_metadata: if len(cn.sequences) == 1: name = _get_sequence_attr_if_exists( seq_id_to_metadata[cn.sequences[0]], "name") if name == "": name = cn.sequences[0] group = _get_sequence_attr_if_exists( seq_id_to_metadata[cn.sequences[0]], "group") seqid = cn.sequences[0] metadata = f"[&&NHX:name={name}:group={group}:seqid={seqid}:mincomp={cn.mincomp}]" elif len(cn.sequences) == 0: name = f"EmptyAffinityNode {cn.id_}" metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]" else: name = f"AffinityNode {cn.id_}" metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]" else: if len(cn.sequences) == 1: name = cn.sequences[0] elif len(cn.sequences) == 0: name = f"EmptyAffinityNode {cn.id_}" else: name = f"AffinityNode {cn.id_}" mincomp = cn.mincomp metadata = f"[&&NHX:name={name}:mincomp={mincomp}]" try: node_label += ':' + newick_node._length + metadata except Exception: print("metadata") descendants = ','.join( [_newick_nhx(n) for n in newick_node.descendants]) if descendants: descendants = '(' + descendants + ')' return descendants + node_label if not self.nodes: return "" sorted_nodes = sorted(self.nodes, key=lambda x: x.id_) remove_children = [] if separate_leaves: new_leaves_count = 0 for node in self.nodes: if len(node.children) == 0 and len(node.sequences) > 1: for seq_id in node.sequences: affinity_node_id = len(self.nodes) + new_leaves_count node.children.append(affinity_node_id) remove_children.append(node.id_) sorted_nodes.append( AffinityNode(id_=AffinityNodeID(affinity_node_id), parent=node.id_, children=[], sequences=[seq_id], mincomp=graph.Compatibility(1.0))) new_leaves_count += 1 nodes_to_process = [(None, sorted_nodes[0])] newick_tree = None while nodes_to_process: n = nodes_to_process.pop() node_parent_label = n[0] node = n[1] label = str(node.id_) if node.parent is None: length = "1" else: parent_minComp = sorted_nodes[ node.parent].mincomp.base_value().value length = str((1 - parent_minComp) - (1 - node.mincomp.base_value().value)) newick_node = newick.Node(name=label, length=length) if newick_tree is None: newick_tree = newick_node else: parent_node = newick_tree.get_node(node_parent_label) parent_node.add_descendant(newick_node) for child in node.children: nodes_to_process.append((label, sorted_nodes[child])) for node in self.nodes: if node.id_ in remove_children: node.children = [] return "(" + _newick_nhx(newick_tree) + ")"
class AffinityTreeGenerationTests(unittest.TestCase): @data((at_params.P(0.5), graph.Compatibility(0.836660026534076)), (at_params.P(1), graph.Compatibility(0.7)), (at_params.P(4), graph.Compatibility(0.6561))) @unpack def test_1_p_parameter_influence(self, p: at_params.P, expected_cutoff: graph.Compatibility): nodes = [ graph.Node(node_id=nid(0), base=b('T'), aligned_to=None), graph.Node(node_id=nid(1), base=b('A'), aligned_to=None), graph.Node(node_id=nid(2), base=b('G'), aligned_to=None), graph.Node(node_id=nid(3), base=b('A'), aligned_to=None), graph.Node(node_id=nid(4), base=b('C'), aligned_to=None), graph.Node(node_id=nid(5), base=b('A'), aligned_to=None), graph.Node(node_id=nid(6), base=b('C'), aligned_to=None), graph.Node(node_id=nid(7), base=b('G'), aligned_to=None), graph.Node(node_id=nid(8), base=b('T'), aligned_to=None), graph.Node(node_id=nid(9), base=b('A'), aligned_to=None) ] sequences = { msa.SequenceID('seq0'): graph.Sequence(msa.SequenceID('seq0'), [ graph.SeqPath( [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 9])]) ], graph.SequenceMetadata({})), msa.SequenceID('seq1'): graph.Sequence(msa.SequenceID('seq1'), [ graph.SeqPath( [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 8, 9])]) ], graph.SequenceMetadata({})), msa.SequenceID('seq2'): graph.Sequence(msa.SequenceID('seq2'), [ graph.SeqPath( [*map(nid, [10, 11, 12, 13, 14, 15, 16, 7, 8, 9])]) ], graph.SequenceMetadata({})), msa.SequenceID('seq3'): graph.Sequence(msa.SequenceID('seq3'), [ graph.SeqPath([*map(nid, [10, 11, 12, 3, 4, 5, 6, 7, 8, 9])]) ], graph.SequenceMetadata({})), msa.SequenceID('seq4'): graph.Sequence( msa.SequenceID('seq3'), [graph.SeqPath([*map(nid, [10, 11, 2, 3, 4, 5, 6, 7, 8, 9])])], graph.SequenceMetadata({})) } poagraph = graph.Poagraph(nodes, sequences) consensus_path = graph.SeqPath( [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19])]) compatibilities = poagraph.get_compatibilities( poagraph.get_sequences_ids(), consensus_path, p) actual_cutoff = at_builders._find_node_cutoff( [c for c in compatibilities.values()], []).cutoff self.assertAlmostEqual(expected_cutoff.value, actual_cutoff.value) @data( # single compatibility value (0.5, [graph.Compatibility(0.5)]), # two compatibilities values (0.7, [graph.Compatibility(0.5), graph.Compatibility(0.7)]), (1, [graph.Compatibility(1), graph.Compatibility(0.45)]), (0.9, [graph.Compatibility(0.9), graph.Compatibility(0.5)]), # repeated values (0.7, [*map(graph.Compatibility, [0.5, 0.7, 0.7])]), (0.9, [*map(graph.Compatibility, [0.9, 0.5, 0.5])]), (1, [*map(graph.Compatibility, [0.45, 1, 0.45, 0.45])]), # many unique compatibilities values (.8, [*map(graph.Compatibility, [.3, .4, .8])]), (0.91, [*map(graph.Compatibility, [0.31, 0.32, 0.91, 0.92, 0.93, 0.97])]), (0.91, [ *map(graph.Compatibility, [0.29, 0.3, 0.33, 0.91, 0.92, 0.93, 0.97]) ]), (1, [*map(graph.Compatibility, [0.81, 0.75, 0.8, 0.81, 1])]), (0.9, [*map(graph.Compatibility, [0.5, 0.9, 0.99])]), (0.7, [*map(graph.Compatibility, [0.2, 0.85, 0.7, 0.8])]), (0.99, [*map(graph.Compatibility, [0.99, 0.9, 0.99])]), (0.99, [*map(graph.Compatibility, [0.99])]), # repeated distance between values (.4, [*map(graph.Compatibility, [.3, .4, .5])]), # all the same values (.1, [*map(graph.Compatibility, [.1, .1, .1])])) @unpack def test_2_find_cutoff_no_so_far_values( self, expected_cutoff: float, compatibilities: List[graph.Compatibility]): actual_cutoff = at_builders._find_node_cutoff(compatibilities, []).cutoff self.assertEqual(expected_cutoff, actual_cutoff.value) def test_3_find_cutoff_no_compatibilities(self): with self.assertRaises(ValueError) as err: _ = at_builders._find_node_cutoff([], []).cutoff self.assertEqual( str(err.exception), """Empty compatibilities list. Cannot find cutoff.""") @data( # guard <= all compatibilities (0.2, [0.2, 0.7, 0.8, 0.85], [0.1, 0.01, 0]), (0.7, [0.7, 0.85, 0.7, 0.8], [0.1, 0.01, 0]), (0.8, [0.7, 0.7, 0.85, 0.8], [0.85, 0.91, 1.0]), # guard > all compatibilities (0.6, [0.3, 0.6, 0.61, 0.61], [0.99]), # big distance to guard (0.9, [0.2, 0.97, 0.98, 0.9], [0.99]), # small distance to guard # guard between compatibilities (0.5, [0.2, 0.57, 0.58, 0.5], [0.55]), # take smaller than guard (0.58, [0.2, 0.27, 0.58, 0.2], [0.55]), # take greater than guard (0.55, [0.2, 0.58, 0.27, 0.55], [0.55]) # take equal to guard ) @unpack def test_4_find_cutoff_with_so_far_values(self, expected_cutoff, compatibilities, so_far_cutoffs): compatibilities = [graph.Compatibility(c) for c in compatibilities] so_far_cutoffs = [graph.Compatibility(c) for c in so_far_cutoffs] actual_cutoff = at_builders._find_node_cutoff(compatibilities, so_far_cutoffs).cutoff self.assertEqual(expected_cutoff, actual_cutoff.value)