def test_4_find_cutoff_with_so_far_values(self, expected_cutoff,
                                           compatibilities, so_far_cutoffs):
     compatibilities = [graph.Compatibility(c) for c in compatibilities]
     so_far_cutoffs = [graph.Compatibility(c) for c in so_far_cutoffs]
     actual_cutoff = at_builders._find_node_cutoff(compatibilities,
                                                   so_far_cutoffs).cutoff
     self.assertEqual(expected_cutoff, actual_cutoff.value)
Beispiel #2
0
    def _convert_consensus_paths_to_affinity_tree_nodes():
        at_nodes = []
        assigned_sequences = []
        for c_id, c_info in consensus_paths.items():
            assigned_sequences += c_info.assigned_sequences_ids
            all_seq = p.get_sequences_ids()
            compatibilities = p.get_compatibilities(all_seq, c_info.path)
            if len(c_info.assigned_sequences_ids):
                assigned_seq_comp = [c
                                     for seq_id, c in compatibilities.items()
                                     if seq_id in c_info.assigned_sequences_ids]
                mincomp = min(assigned_seq_comp)
            else:
                mincomp = 0
            new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1),
                                         parent=tree.AffinityNodeID(0),
                                         sequences=c_info.assigned_sequences_ids,
                                         mincomp=mincomp,
                                         compatibilities=compatibilities,
                                         consensus=c_info.path,
                                         children=[])
            at_nodes.append(new_node)

        node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0),
                                                          sequences=[seq_id
                                                                     for seq_id in p.get_sequences_ids()
                                                                     if seq_id not in assigned_sequences],
                                                          id_=tree.AffinityNodeID(len(at_nodes) + 1),
                                                          mincomp=graph.Compatibility(0),
                                                          children=[])
        at_nodes.append(node_for_unassigned_sequences)
        return at_nodes
Beispiel #3
0
 def __init__(self,
              id_: AffinityNodeID,
              parent: Optional[AffinityNodeID] = None,
              children: Optional[List[AffinityNodeID]] = None,
              sequences: Optional[List[msa.SequenceID]] = None,
              mincomp: Optional[graph.Compatibility] = None,
              compatibilities: Optional[Dict[msa.SequenceID,
                                             graph.Compatibility]] = None,
              consensus: Optional[graph.SeqPath] = None):
     self.id_: AffinityNodeID = id_
     self.parent: AffinityNodeID = parent
     self.children: List[AffinityNodeID] = children if children else []
     self.sequences: List[msa.SequenceID] = sequences if sequences else []
     self.mincomp: graph.Compatibility = mincomp if mincomp else graph.Compatibility(
         0)
     self.compatibilities: Dict[
         msa.SequenceID,
         graph.Compatibility] = compatibilities if compatibilities else {}
     self.consensus: graph.SeqPath = consensus
Beispiel #4
0
    def as_newick(self,
                  seq_id_to_metadata: Dict[msa.SequenceID,
                                           graph.SequenceMetadata] = None,
                  separate_leaves=False) -> str:
        """Returns Affinity Tree in Newick format.

        Args:
            seq_id_to_metadata: Dictionary of _sequences IDs to the desired
                name used in newick file. For example:
                                {SequenceID('KM0123'): 'cat',
                                SequenceID('ZX124'): 'dog'}
            separate_leaves: A switch to control if tree leaves having
                assigned multiple _sequences should have appended
                children singleton leaves single sequence assigned.

        Returns:
            A string with the Affinity Tree converted to newick format.
            https://en.wikipedia.org/wiki/Newick_format
            If the tree has no nodes, an empty string is returned.
        """
        def _get_sequence_attr_if_exists(seq_metadata: graph.SequenceMetadata,
                                         attr: str) -> str:
            """Returns dictionary value if they key attr exists."""

            if attr in seq_metadata:
                return str(seq_metadata[attr])
            else:
                return ""

        def _newick_nhx(newick_node: newick.Node) -> str:
            """Converts newick tree to newick string"""

            node_label = newick_node.name or ''
            if newick_node._length:
                for cn in sorted_nodes:
                    if str(cn.id_) == newick_node.name:
                        if seq_id_to_metadata:
                            if len(cn.sequences) == 1:
                                name = _get_sequence_attr_if_exists(
                                    seq_id_to_metadata[cn.sequences[0]],
                                    "name")
                                if name == "":
                                    name = cn.sequences[0]
                                group = _get_sequence_attr_if_exists(
                                    seq_id_to_metadata[cn.sequences[0]],
                                    "group")
                                seqid = cn.sequences[0]
                                metadata = f"[&&NHX:name={name}:group={group}:seqid={seqid}:mincomp={cn.mincomp}]"
                            elif len(cn.sequences) == 0:
                                name = f"EmptyAffinityNode {cn.id_}"
                                metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]"
                            else:
                                name = f"AffinityNode {cn.id_}"
                                metadata = f"[&&NHX:name={name}:mincomp={cn.mincomp}]"
                        else:
                            if len(cn.sequences) == 1:
                                name = cn.sequences[0]
                            elif len(cn.sequences) == 0:
                                name = f"EmptyAffinityNode {cn.id_}"
                            else:
                                name = f"AffinityNode {cn.id_}"
                            mincomp = cn.mincomp
                            metadata = f"[&&NHX:name={name}:mincomp={mincomp}]"
                try:
                    node_label += ':' + newick_node._length + metadata
                except Exception:
                    print("metadata")
            descendants = ','.join(
                [_newick_nhx(n) for n in newick_node.descendants])
            if descendants:
                descendants = '(' + descendants + ')'
            return descendants + node_label

        if not self.nodes:
            return ""

        sorted_nodes = sorted(self.nodes, key=lambda x: x.id_)
        remove_children = []
        if separate_leaves:
            new_leaves_count = 0
            for node in self.nodes:
                if len(node.children) == 0 and len(node.sequences) > 1:
                    for seq_id in node.sequences:
                        affinity_node_id = len(self.nodes) + new_leaves_count
                        node.children.append(affinity_node_id)
                        remove_children.append(node.id_)
                        sorted_nodes.append(
                            AffinityNode(id_=AffinityNodeID(affinity_node_id),
                                         parent=node.id_,
                                         children=[],
                                         sequences=[seq_id],
                                         mincomp=graph.Compatibility(1.0)))
                        new_leaves_count += 1

        nodes_to_process = [(None, sorted_nodes[0])]
        newick_tree = None
        while nodes_to_process:
            n = nodes_to_process.pop()
            node_parent_label = n[0]
            node = n[1]

            label = str(node.id_)
            if node.parent is None:
                length = "1"
            else:
                parent_minComp = sorted_nodes[
                    node.parent].mincomp.base_value().value
                length = str((1 - parent_minComp) -
                             (1 - node.mincomp.base_value().value))

            newick_node = newick.Node(name=label, length=length)

            if newick_tree is None:
                newick_tree = newick_node
            else:
                parent_node = newick_tree.get_node(node_parent_label)
                parent_node.add_descendant(newick_node)

            for child in node.children:
                nodes_to_process.append((label, sorted_nodes[child]))
        for node in self.nodes:
            if node.id_ in remove_children:
                node.children = []

        return "(" + _newick_nhx(newick_tree) + ")"
class AffinityTreeGenerationTests(unittest.TestCase):
    @data((at_params.P(0.5), graph.Compatibility(0.836660026534076)),
          (at_params.P(1), graph.Compatibility(0.7)),
          (at_params.P(4), graph.Compatibility(0.6561)))
    @unpack
    def test_1_p_parameter_influence(self, p: at_params.P,
                                     expected_cutoff: graph.Compatibility):
        nodes = [
            graph.Node(node_id=nid(0), base=b('T'), aligned_to=None),
            graph.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(4), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(5), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            graph.Node(node_id=nid(7), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(8), base=b('T'), aligned_to=None),
            graph.Node(node_id=nid(9), base=b('A'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'), [
                graph.SeqPath(
                    [*map(nid, [10, 11, 12, 13, 14, 15, 16, 7, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [
                graph.SeqPath([*map(nid, [10, 11, 12, 3, 4, 5, 6, 7, 8, 9])])
            ], graph.SequenceMetadata({})),
            msa.SequenceID('seq4'):
            graph.Sequence(
                msa.SequenceID('seq3'),
                [graph.SeqPath([*map(nid, [10, 11, 2, 3, 4, 5, 6, 7, 8, 9])])],
                graph.SequenceMetadata({}))
        }

        poagraph = graph.Poagraph(nodes, sequences)

        consensus_path = graph.SeqPath(
            [*map(nid, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19])])
        compatibilities = poagraph.get_compatibilities(
            poagraph.get_sequences_ids(), consensus_path, p)

        actual_cutoff = at_builders._find_node_cutoff(
            [c for c in compatibilities.values()], []).cutoff
        self.assertAlmostEqual(expected_cutoff.value, actual_cutoff.value)

    @data(
        # single compatibility value
        (0.5, [graph.Compatibility(0.5)]),

        # two compatibilities values
        (0.7, [graph.Compatibility(0.5),
               graph.Compatibility(0.7)]),
        (1, [graph.Compatibility(1),
             graph.Compatibility(0.45)]),
        (0.9, [graph.Compatibility(0.9),
               graph.Compatibility(0.5)]),

        # repeated values
        (0.7, [*map(graph.Compatibility, [0.5, 0.7, 0.7])]),
        (0.9, [*map(graph.Compatibility, [0.9, 0.5, 0.5])]),
        (1, [*map(graph.Compatibility, [0.45, 1, 0.45, 0.45])]),

        # many unique compatibilities values
        (.8, [*map(graph.Compatibility, [.3, .4, .8])]),
        (0.91,
         [*map(graph.Compatibility, [0.31, 0.32, 0.91, 0.92, 0.93, 0.97])]),
        (0.91, [
            *map(graph.Compatibility,
                 [0.29, 0.3, 0.33, 0.91, 0.92, 0.93, 0.97])
        ]),
        (1, [*map(graph.Compatibility, [0.81, 0.75, 0.8, 0.81, 1])]),
        (0.9, [*map(graph.Compatibility, [0.5, 0.9, 0.99])]),
        (0.7, [*map(graph.Compatibility, [0.2, 0.85, 0.7, 0.8])]),
        (0.99, [*map(graph.Compatibility, [0.99, 0.9, 0.99])]),
        (0.99, [*map(graph.Compatibility, [0.99])]),

        # repeated distance between values
        (.4, [*map(graph.Compatibility, [.3, .4, .5])]),

        # all the same values
        (.1, [*map(graph.Compatibility, [.1, .1, .1])]))
    @unpack
    def test_2_find_cutoff_no_so_far_values(
            self, expected_cutoff: float,
            compatibilities: List[graph.Compatibility]):
        actual_cutoff = at_builders._find_node_cutoff(compatibilities,
                                                      []).cutoff
        self.assertEqual(expected_cutoff, actual_cutoff.value)

    def test_3_find_cutoff_no_compatibilities(self):
        with self.assertRaises(ValueError) as err:
            _ = at_builders._find_node_cutoff([], []).cutoff
            self.assertEqual(
                str(err.exception), """Empty compatibilities list.
                                                    Cannot find cutoff.""")

    @data(
        # guard <= all compatibilities
        (0.2, [0.2, 0.7, 0.8, 0.85], [0.1, 0.01, 0]),
        (0.7, [0.7, 0.85, 0.7, 0.8], [0.1, 0.01, 0]),
        (0.8, [0.7, 0.7, 0.85, 0.8], [0.85, 0.91, 1.0]),

        # guard > all compatibilities
        (0.6, [0.3, 0.6, 0.61, 0.61], [0.99]),  # big distance to guard
        (0.9, [0.2, 0.97, 0.98, 0.9], [0.99]),  # small distance to guard

        # guard between compatibilities
        (0.5, [0.2, 0.57, 0.58, 0.5], [0.55]),  # take smaller than guard
        (0.58, [0.2, 0.27, 0.58, 0.2], [0.55]),  # take greater than guard
        (0.55, [0.2, 0.58, 0.27, 0.55], [0.55])  # take equal to guard
    )
    @unpack
    def test_4_find_cutoff_with_so_far_values(self, expected_cutoff,
                                              compatibilities, so_far_cutoffs):
        compatibilities = [graph.Compatibility(c) for c in compatibilities]
        so_far_cutoffs = [graph.Compatibility(c) for c in so_far_cutoffs]
        actual_cutoff = at_builders._find_node_cutoff(compatibilities,
                                                      so_far_cutoffs).cutoff
        self.assertEqual(expected_cutoff, actual_cutoff.value)