Ejemplo n.º 1
0
    def test_consensus_smaller_than_min_match_len(self):
        """
        Usually, a match smaller than min_match_length counts as non-match,
        but if the whole string is smaller than min_match_length, counts as match.
        """
        tester1 = IntervalPartitioner("TTATT", min_match_length=7, alignment=MSA([]))
        match, non_match, _ = tester1.get_intervals()
        self.assertEqual(match, make_typed_intervals([[0, 4]], Match))
        self.assertEqual(non_match, [])

        tester2 = IntervalPartitioner("T*ATT", min_match_length=7, alignment=MSA([]))
        match, non_match, _ = tester2.get_intervals()
        self.assertEqual(match, [])
        self.assertEqual(non_match, make_typed_intervals([[0, 4]], NonMatch))
Ejemplo n.º 2
0
 def test_GivenUnorderedIds_SubalignmentStillInSequenceOrder(self):
     """
     Sequences given rearranged are still output in input order
     """
     result = PrgBuilder.get_sub_alignment_by_list_id(["s3", "s1"], self.alignment)
     expected = MSA([self.alignment[0], self.alignment[2]])
     self.assertTrue(msas_equal(expected, result))
Ejemplo n.º 3
0
 def test_ambiguous_sequences_in_short_interval_separate_clusters(self):
     alignment = MSA([
         SeqRecord(Seq("ARAT"), id="s1"),
         SeqRecord(Seq("WAAT"), id="s2"),
     ])
     result = kmeans_cluster_seqs_in_interval([0, 3], alignment, 5)
     self.assertEqual([["s1"], ["s2"]], result)
Ejemplo n.º 4
0
 def test_get_subalignment_with_interval(self):
     result = PrgBuilder.get_sub_alignment_by_list_id(
         ["s2", "s3"], self.alignment, [0, 2]
     )
     expected = MSA(
         [SeqRecord(Seq("C--"), id="s2"), SeqRecord(Seq("AAT"), id="s3"),]
     )
     self.assertTrue(msas_equal(expected, result))
Ejemplo n.º 5
0
 def test_TwoIdenticalSequencesClusteredTogether(self):
     alignment = MSA([
         SeqRecord(Seq("AAAT"), id="s1"),
         SeqRecord(Seq("AAAT"), id="s2"),
         SeqRecord(Seq("C-CC"), id="s3"),
     ])
     result = kmeans_cluster_seqs_in_interval([0, 3], alignment, 1)
     self.assertEqual([["s1", "s2"], ["s3"]], result)
Ejemplo n.º 6
0
 def setUpClass(cls):
     cls.alignment = MSA(
         [
             SeqRecord(Seq("AAAT"), id="s1"),
             SeqRecord(Seq("C--C"), id="s2"),
             SeqRecord(Seq("AATT"), id="s3"),
             SeqRecord(Seq("GNGG"), id="s4"),
         ]
     )
Ejemplo n.º 7
0
 def test_end_in_non_match(self):
     tester = IntervalPartitioner(
         "**ATT**AAA*C", min_match_length=3, alignment=MSA([])
     )
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[2, 4], [7, 9]], Match))
     self.assertEqual(
         non_match, make_typed_intervals([[0, 1], [5, 6], [10, 11]], NonMatch)
     )
Ejemplo n.º 8
0
 def test_match_non_match_match(self):
     tester = IntervalPartitioner("ATT**AAAC", min_match_length=3, alignment=MSA([]))
     match, non_match, all_match = tester.get_intervals()
     expected_matches = make_typed_intervals([[0, 2], [5, 8]], Match)
     expected_non_matches = make_typed_intervals([[3, 4]], NonMatch)
     self.assertEqual(match, expected_matches)
     self.assertEqual(non_match, expected_non_matches)
     # Check interval sorting works
     self.assertEqual(
         all_match,
         [expected_matches[0], expected_non_matches[0], expected_matches[1]],
     )
Ejemplo n.º 9
0
    def test_one_long_one_short_sequence_separate_and_ordered_clusters(self):
        alignment = MSA([
            SeqRecord(Seq("AATTAATTATATAATAAC"), id="s1"),
            SeqRecord(Seq("A--------------AAT"), id="s2"),
        ])
        order_1 = kmeans_cluster_seqs_in_interval([0, len(alignment[0])],
                                                  alignment, 5)
        self.assertEqual(order_1, [["s1"], ["s2"]])

        order_2 = kmeans_cluster_seqs_in_interval([0, len(alignment[0])],
                                                  alignment[::-1], 5)
        self.assertEqual(order_2, [["s2"], ["s1"]])
Ejemplo n.º 10
0
 def test_GivenAllSequencesBelowKmerSize_NoKMeansAndIdenticalSequencesClustered(
         self, mockKMeans):
     alignment = MSA([
         SeqRecord(Seq("AA---AT"), id="s1"),
         SeqRecord(Seq("AA---TT"), id="s2"),
         SeqRecord(Seq("CA--CAT"), id="s3"),
         SeqRecord(Seq("A-A--AT"), id="s4"),
     ])
     result = kmeans_cluster_seqs_in_interval([0, len(alignment[0])],
                                              alignment, 6)
     mockKMeans.assert_not_called()
     self.assertEqual([["s1", "s4"], ["s2"], ["s3"]], result)
Ejemplo n.º 11
0
 def test_GivenOrderedIds_SubalignmentInSequenceOrder(self):
     result = PrgBuilder.get_sub_alignment_by_list_id(["s1", "s3"],
                                                      self.alignment)
     expected = MSA([self.alignment[0], self.alignment[2]])
     self.assertTrue(msas_equal(expected, result))
Ejemplo n.º 12
0
 def test_short_match_counted_as_non_match(self):
     tester = IntervalPartitioner("AT***", min_match_length=3, alignment=MSA([]))
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, [])
     self.assertEqual(non_match, make_typed_intervals([[0, 4]], NonMatch))
Ejemplo n.º 13
0
 def test_all_match(self):
     tester = IntervalPartitioner("ATATAAA", min_match_length=3, alignment=MSA([]))
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[0, 6]], Match))
     self.assertEqual(non_match, [])
Ejemplo n.º 14
0
 def test_one_seq_returns_single_id(self):
     alignment = MSA([SeqRecord(Seq("AAAT"), id="s1")])
     result = kmeans_cluster_seqs_in_interval([0, 3], alignment, 1)
     self.assertTrue(result.no_clustering)