Ejemplo n.º 1
0
 def test_avoid_empty_alleles_previous_non_match_merged(self):
     """Edge case of collapsed match interval, part 2"""
     msa = make_alignment(["CCTTAGGTTT", "AATTA--TTT"])
     tester = IntervalPartitioner("**TTA**TTT", min_match_length=3, alignment=msa)
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[7, 9]], Match))
     self.assertEqual(non_match, make_typed_intervals([[0, 6]], NonMatch))
Ejemplo n.º 2
0
 def test_nonmatch_interval_switching_Ns(self):
     """'N's make sequences get removed"""
     alignment = make_alignment(["ANAAA", "ATAAT"])
     match_intervals, non_match_intervals = make_intervals([], [[0, 5]])
     IntervalPartitioner.enforce_multisequence_nonmatch_intervals(
         match_intervals, non_match_intervals, alignment
     )
     self.assertEqual(match_intervals, make_typed_intervals([[0, 5]], Match))
     self.assertEqual(non_match_intervals, [])
Ejemplo n.º 3
0
 def test_nonmatch_interval_switching_indels(self):
     """Because the sequences are the same, despite different alignment"""
     alignment = make_alignment(["A---A", "A-A--"])
     match_intervals, non_match_intervals = make_intervals([], [[0, 5]])
     IntervalPartitioner.enforce_multisequence_nonmatch_intervals(
         match_intervals, non_match_intervals, alignment
     )
     self.assertEqual(match_intervals, make_typed_intervals([[0, 5]], Match))
     self.assertEqual(non_match_intervals, [])
Ejemplo n.º 4
0
 def test_end_in_non_match(self):
     tester = IntervalPartitioner(
         "**ATT**AAA*C", min_match_length=3, alignment=MSA([])
     )
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[2, 4], [7, 9]], Match))
     self.assertEqual(
         non_match, make_typed_intervals([[0, 1], [5, 6], [10, 11]], NonMatch)
     )
Ejemplo n.º 5
0
 def test_avoid_empty_alleles_short_match(self):
     """
     Padding behaviour also expected, but now the leading match interval becomes too
     short and collapses to a non_match interval
     """
     msa = make_alignment(["TTAGGTTT", "TTA--TTT"])
     tester = IntervalPartitioner("TTA**TTT", min_match_length=3, alignment=msa)
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[5, 7]], Match))
     self.assertEqual(non_match, make_typed_intervals([[0, 4]], NonMatch))
Ejemplo n.º 6
0
 def test_avoid_empty_alleles_long_match(self):
     """
     If we let the non-match interval be only [4,5],
     this would result in an empty allele in the prg,
     so require padding using the preceding match sequence
     """
     msa = make_alignment(["TTAAGGTTT", "TTAA--TTT"])
     tester = IntervalPartitioner("TTAA**TTT", min_match_length=3, alignment=msa)
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[0, 2], [6, 8]], Match))
     self.assertEqual(non_match, make_typed_intervals([[3, 5]], NonMatch))
Ejemplo n.º 7
0
 def test_match_non_match_match(self):
     tester = IntervalPartitioner("ATT**AAAC", min_match_length=3, alignment=MSA([]))
     match, non_match, all_match = tester.get_intervals()
     expected_matches = make_typed_intervals([[0, 2], [5, 8]], Match)
     expected_non_matches = make_typed_intervals([[3, 4]], NonMatch)
     self.assertEqual(match, expected_matches)
     self.assertEqual(non_match, expected_non_matches)
     # Check interval sorting works
     self.assertEqual(
         all_match,
         [expected_matches[0], expected_non_matches[0], expected_matches[1]],
     )
Ejemplo n.º 8
0
    def __init__(
        self,
        msa_file,
        alignment_format="fasta",
        max_nesting=2,
        nesting_level=1,
        min_match_length=3,
        site=5,
        alignment=None,
        interval=None,
        prg_file=None,
    ):
        self.msa_file = msa_file
        self.alignment_format = alignment_format
        self.max_nesting = max_nesting
        self.nesting_level = nesting_level
        self.min_match_length = min_match_length
        self.site = site
        self.alignment: MSA = alignment
        if self.alignment is None:
            self.alignment = load_alignment_file(msa_file, alignment_format)

        self.interval = interval
        self.consensus = self.get_consensus(self.alignment)
        self.length = len(self.consensus)
        (
            self.match_intervals,
            self.non_match_intervals,
            self.all_intervals,
        ) = IntervalPartitioner(
            self.consensus, self.min_match_length, self.alignment
        ).get_intervals()
        logging.info(
            "match intervals: %s; non_match intervals: %s",
            self.match_intervals,
            self.non_match_intervals,
        )

        # properties for stats
        self.subAlignedSeqs = {}

        # make prg
        self.delim_char = " "
        self.prg = ""
        if prg_file is not None:
            logging.info(
                "Reading from a PRG file which already exists. To regenerate, delete it."
            )
            with open(prg_file, "r") as f:
                self.prg = f.read()
        else:
            self.prg = self._get_prg()
Ejemplo n.º 9
0
    def test_consensus_smaller_than_min_match_len(self):
        """
        Usually, a match smaller than min_match_length counts as non-match,
        but if the whole string is smaller than min_match_length, counts as match.
        """
        tester1 = IntervalPartitioner("TTATT", min_match_length=7, alignment=MSA([]))
        match, non_match, _ = tester1.get_intervals()
        self.assertEqual(match, make_typed_intervals([[0, 4]], Match))
        self.assertEqual(non_match, [])

        tester2 = IntervalPartitioner("T*ATT", min_match_length=7, alignment=MSA([]))
        match, non_match, _ = tester2.get_intervals()
        self.assertEqual(match, [])
        self.assertEqual(non_match, make_typed_intervals([[0, 4]], NonMatch))
Ejemplo n.º 10
0
 def test_short_match_counted_as_non_match(self):
     tester = IntervalPartitioner("AT***", min_match_length=3, alignment=MSA([]))
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, [])
     self.assertEqual(non_match, make_typed_intervals([[0, 4]], NonMatch))
Ejemplo n.º 11
0
 def test_all_match(self):
     tester = IntervalPartitioner("ATATAAA", min_match_length=3, alignment=MSA([]))
     match, non_match, _ = tester.get_intervals()
     self.assertEqual(match, make_typed_intervals([[0, 6]], Match))
     self.assertEqual(non_match, [])
Ejemplo n.º 12
0
 def test_bijection_respected_passes(self):
     match_intervals, nmatch_intervals = make_intervals([[0, 2], [5, 10]], [[3, 4]])
     IntervalPartitioner.enforce_alignment_interval_bijection(
         match_intervals, nmatch_intervals, 11
     )
Ejemplo n.º 13
0
 def test_position_in_match_and_nonmatch_intervals_fails(self):
     match_intervals, nmatch_intervals = make_intervals([[0, 2]], [[2, 3]])
     with self.assertRaises(PartitioningError):
         IntervalPartitioner.enforce_alignment_interval_bijection(
             match_intervals, nmatch_intervals, 4
         )
Ejemplo n.º 14
0
 def test_position_in_no_interval_fails(self):
     match_intervals = make_typed_intervals([[0, 1]], Match)
     with self.assertRaises(PartitioningError):
         IntervalPartitioner.enforce_alignment_interval_bijection(
             match_intervals, [], 3
         )