コード例 #1
0
    def test_single_substitution_in_long_text(self):
        substring = b('PATTERN')
        text = b(''.join([x.strip() for x in '''\
            FySijRLMtLLWkMnWxTbzIWuxOUbfAahWYKUlOZyhoQhfExJPOSwXxBLrlqdoUwpRW
            FEtHFiepnOTbkttuagADQaUTvkvKzvqaFaMnAPfolPpmXitKLDQhAqDOJwFzdcKmk
            cfVStxZGDUbrHjrDwVVRihbklyfqLJjrzGuhVGDzgSpCHXvaGPHebbcUAnAgfqqpA
            uMOowtptcoQUeAbdqJAmieLDxCrOPivbSwmriQwfFCDTXbswFqClZPnSkDkCyvPCi
            bmAjVGnuVsrZlPypglXlVVQKzMpQuWQynOLGDqwrAnsvYTcArkEhFpEgahWVQGOvv
            CTvbYZRVqqPCDRsyWeTVgANxZIyVAtENnndbsHzpEcPUfqCBUroIGRNEIMHYIZANy
            LeeVKEwihbvWZVOWPeAlmNKnhhoEPIcpDJDzPOYHSltxhSsZeeWMqtAnuSoFOIrqB
            EPUFIlKkpamljHylnTIWqaESoWbYESVPEeZtlAzpInuwFaNIYUvzpJNIlPtuOjUuT
            efaGnOXvQeHdaRPrdHCepPATXERNDdnkzuLHQcVWKpgHhGifBySAkWkthrzfZDHDU
            HJxjpLXseKuldLRftyctGvVKyrRTUCRAakjwTSWivGdksOZabnkBoRtMstlNwXcwg
            UCFLaWFxjqjasOfNeThrbubVGtyYRROYUOTMUmeSdJcBKxVXiaWDZoHyKtQRXwpVO
            pEmlpdzKWkFpDtHHdImhDJIXwxzjwyNLaTgPLHmcyhJGqncCblxALMdPEDaRtGFMg
            BskUxPGATTLKMFeIjgFJpudyMWlASyFSiaDWrOCgRfwjfpMYfuNQIqzvZbguWsnaq
            tRaXcxavobetBbbfMDjstQLjoJLwiajVRKhFVspIdgrmTMEBbjtpMnSpTkmFcRBZZ
            GUOWnesGgZeKkIQhlxlRPTtjUbbpaPlmxeiBdUKHHApgvEybUwWwXCoXFsauNiINm
            AGATFdcaHzgoRpbBFhKdJkLMF'''.splitlines()]))
        expected_match = Match(start=541, end=548, dist=1)

        self.assertEqual(
            self.search(substring, text, 1, 0, 0, 1),
            [expected_match],
        )

        self.assertEqual(
            self.search(substring, text, 1, 1, 1, 1),
            [expected_match],
        )
コード例 #2
0
    def test_protein_search2(self):
        # see:
        # * BioPython archives from March 14th, 2014
        #   http://lists.open-bio.org/pipermail/biopython/2014-March/009030.html
        # * https://github.com/taleinat/fuzzysearch/issues/3
        text = b(''.join('''\
            XXXXXXXXXXXXXXXXXXXGGGTTVTTSSAAAAAAAAAAAAAGGGTTVTTSSAAAAAAAAAAA
            AAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBGGGTTLTTSS
        '''.split()))
        pattern = b("GGGTTLTTSS")

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=0),
            [Match(start=99, end=109, dist=0)],
        )

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=1),
            [Match(start=19, end=29, dist=1),
             Match(start=42, end=52, dist=1),
             Match(start=99, end=109, dist=0)],
        )

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=2),
            [Match(start=19, end=29, dist=1),
             Match(start=42, end=52, dist=1),
             Match(start=99, end=109, dist=0)],
        )
コード例 #3
0
 def test_missing_second_item_complex(self):
     self.assertTrue(
         set(self.search(b('bde'), b('abcdefg'), 1, 1, 1, 1)).issubset([
             Match(start=1, end=5, dist=1),
             Match(start=2, end=5, dist=1),
             Match(start=3, end=5, dist=1),
         ])
     )
コード例 #4
0
    def test_short_substring(self):
        substring = b('XY')
        text = b('abcdefXYghij')
        expected_match = Match(start=6, end=8, dist=0)

        self.assertEqual(
            self.search(substring, text, 0, 0, 0, 0),
            [expected_match],
        )
コード例 #5
0
    def test_substring(self):
        substring = b('PATTERN')
        text = b('aaaaaaaaaaPATTERNaaaaaaaaa')
        expected_match = Match(start=10, end=17, dist=0)

        self.assertEqual(
            self.search(substring, text, 0, 0, 0, 0),
            [expected_match],
        )
コード例 #6
0
    def test_one_missing_in_middle(self):
        substring = b('PATTERN')
        text = b('aaaaaaaaaaPATERNaaaaaaaaa')

        for max_subs in [0, 1, 2]:
            self.expectedOutcomes(
                self.search(substring, text, max_subs=max_subs),
                [],
            )
コード例 #7
0
    def test_null_bytes(self):
        self.assertEqual(
            self.search(b('abc'), b('xx\0abcxx'), 0, 0, 0, 0),
            [Match(start=3, end=6, dist=0)],
        )

        self.assertEqual(
            self.search(b('a\0b'), b('xxa\0bcxx'), 0, 0, 0, 0),
            [Match(start=2, end=5, dist=0)],
        )
コード例 #8
0
    def test_two_identical(self):
        self.expectedOutcomes(
            self.search(b('abc'), b('abcabc'), max_subs=1),
            [Match(start=0, end=3, dist=0), Match(start=3, end=6, dist=0)],
        )

        self.expectedOutcomes(
            self.search(b('abc'), b('abcXabc'), max_subs=1),
            [Match(start=0, end=3, dist=0), Match(start=4, end=7, dist=0)],
        )
コード例 #9
0
 def test_max_substitutions_gte_subseq_len(self):
     for max_subs in [1, 2, 5]:
         self.expectedOutcomes(
             self.search(b('b'), b('abc'), max_subs),
             [Match(0, 1, 1), Match(1, 2, 0), Match(2, 3, 1)]
         )
     for extra_subs in [0, 1, 7]:
         self.expectedOutcomes(
             self.search(b('PATTERN'), b('PATTERN'), len('PATTERN') + extra_subs),
             [Match(0, len('PATTERN'), 0)]
         )
コード例 #10
0
    def test_double_first_item(self):
        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), max_subs=1),
            [Match(start=4, end=7, dist=0)],
        )

        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), max_subs=2),
            [Match(start=3, end=6, dist=2),
             Match(start=4, end=7, dist=0)],
        )
コード例 #11
0
    def test_subseq_length_less_than_max_l_dist(self):
        with self.assertRaises(ValueError):
            self.search(b('b'), b('abc'), 2, 2, 2, 2)

        with self.assertRaises(ValueError):
            self.search(b('b'), b('abc'), 5, 5, 5, 5)

        with self.assertRaises(ValueError):
            self.search(
                b('PATTERN'),
                b('PATTERN'),
                len('PATTERN') + 1,
                len('PATTERN') + 1,
                len('PATTERN') + 1,
                len('PATTERN') + 1,
            )

        with self.assertRaises(ValueError):
            self.search(
                b('PATTERN'),
                b('PATTERN'),
                len('PATTERN') + 7,
                len('PATTERN') + 7,
                len('PATTERN') + 7,
                len('PATTERN') + 7,
            )
コード例 #12
0
    def test_dna_search(self):
        # see: http://stackoverflow.com/questions/19725127/
        text = b(''.join('''\
            GACTAGCACTGTAGGGATAACAATTTCACACAGGTGGACAATTACATTGAAAATCACAGATTGGT
            CACACACACATTGGACATACATAGAAACACACACACATACATTAGATACGAACATAGAAACACAC
            ATTAGACGCGTACATAGACACAAACACATTGACAGGCAGTTCAGATGATGACGCCCGACTGATAC
            TCGCGTAGTCGTGGGAGGCAAGGCACACAGGGGATAGG
            '''.split()))
        pattern = b('TGCACTGTAGGGATAACAAT')

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=2),
            [Match(start=4, end=24, dist=1)],
        )
コード例 #13
0
    def test_dna_search(self):
        # see: http://stackoverflow.com/questions/19725127/
        text = b(''.join('''\
            GACTAGCACTGTAGGGATAACAATTTCACACAGGTGGACAATTACATTGAAAATCACAGATTGGT
            CACACACACATTGGACATACATAGAAACACACACACATACATTAGATACGAACATAGAAACACAC
            ATTAGACGCGTACATAGACACAAACACATTGACAGGCAGTTCAGATGATGACGCCCGACTGATAC
            TCGCGTAGTCGTGGGAGGCAAGGCACACAGGGGATAGG
            '''.split()))
        pattern = b('TGCACTGTAGGGATAACAAT')

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=2),
            [Match(start=4, end=24, dist=1, matched=text[4:24])],
        )
コード例 #14
0
    def test_all_different(self):
        substring = b('AAAA')
        text = b('ZZZZ')

        for max_subs in [0, 1, 2, 3]:
            self.expectedOutcomes(
                self.search(substring, text, max_subs=max_subs),
                [],
            )

        for max_subs in [4, 5]:
            self.expectedOutcomes(
                self.search(substring, text, max_subs=max_subs),
                [Match(start=0, end=4, dist=4)],
            )
コード例 #15
0
    def test_all_different(self):
        substring = b('AAAA')
        text = b('ZZZZ')

        for max_subs in [0, 1, 2, 3]:
            self.expectedOutcomes(
                self.search(substring, text, max_subs=max_subs),
                [],
            )

        for max_subs in [4, 5]:
            self.expectedOutcomes(
                self.search(substring, text, max_subs=max_subs),
                [Match(start=0, end=4, dist=4, matched=b('ZZZZ'))],
            )
コード例 #16
0
    def test_one_changed_in_middle2(self):
        substring = b('PATTERN')
        text = b('aaaaaaaaaaPATtERNaaaaaaaaa')
        expected_match = Match(start=10, end=17, dist=1, matched=b('PATtERN'))

        self.expectedOutcomes(
            self.search(substring, text, max_subs=0),
            [],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=1),
            [expected_match],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=2),
            [expected_match],
        )
コード例 #17
0
    def test_substring(self):
        substring = b('PATTERN')
        text = b('aaaaaaaaaaPATTERNaaaaaaaaa')
        expected_match = Match(start=10, end=17, dist=0)

        self.expectedOutcomes(
            self.search(substring, text, max_subs=0),
            [expected_match],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=1),
            [expected_match],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=2),
            [expected_match],
        )
コード例 #18
0
 def test_invalid_none_arguments(self):
     # check that an exception is raised when max_l_dist is None as well as
     # at least one other limitation
     N = None
     for (max_subs, max_ins, max_dels) in [
         (N, 0, 0),
         (0, N, 0),
         (0, 0, N),
         (0, N, N),
         (N, 0, N),
         (N, N, 0),
         (N, N, N),
     ]:
         with self.subTest('max_subs={0}, max_ins={1}, max_dels={2}, max_l_dist=None'.format(
                 max_subs, max_ins, max_dels)):
             with self.assertRaises(ValueError):
                 self.search(b('a'), b('b'), max_subs, max_ins, max_dels, None)
コード例 #19
0
    def test_substring(self):
        substring = b('PATTERN')
        text = b('aaaaaaaaaaPATTERNaaaaaaaaa')
        expected_match = Match(start=10, end=17, dist=0, matched=b('PATTERN'))

        self.expectedOutcomes(
            self.search(substring, text, max_subs=0),
            [expected_match],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=1),
            [expected_match],
        )
        self.expectedOutcomes(
            self.search(substring, text, max_subs=2),
            [expected_match],
        )
コード例 #20
0
    def test_missing_second_item_complex(self):
        self.expectedOutcomes(
            self.search(b('bde'), b('abcdefg'), 1, 1, 1, 1),
            [Match(start=1, end=5, dist=1),
             Match(start=2, end=5, dist=1),
             Match(start=3, end=5, dist=1)],
        )

        self.assertTrue(
            set([
                Match(start=1, end=5, dist=1),
                Match(start=2, end=5, dist=1),
                Match(start=3, end=5, dist=1),
                Match(start=2, end=5, dist=3),
            ]).issubset(set(
                self.search(b('bde'), b('abcdefg'), 1, 1, 1, 3),
            ))
        )
コード例 #21
0
        def test_missing_second_item_complex(self):
            self.assertEqual(
                self.search(b('bde'), b('abcdefg'), 1, 1, 1, 1),
                [
                    Match(start=1, end=5, dist=1),
                    Match(start=2, end=5, dist=1),
                    Match(start=3, end=5, dist=1)
                ],
            )

            self.assertTrue(
                set([
                    Match(start=1, end=5, dist=1),
                    Match(start=2, end=5, dist=1),
                    Match(start=3, end=5, dist=1),
                    Match(start=2, end=5, dist=3),
                ]).issubset(
                    set(self.search(b('bde'), b('abcdefg'), 1, 1, 1, 3), )))
コード例 #22
0
    def test_one_changed_in_middle(self):
        substring = b('abcdefg')
        pattern = b('abcXefg')
        expected_match = Match(start=0, end=7, dist=1)

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=0),
            [],
        )

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=1),
            [expected_match],
        )

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=2),
            [expected_match],
        )
コード例 #23
0
 def test_valid_none_arguments_with_defined_max_l_dist(self):
     # expect no exception when max_l_dist is not None and some or all other
     # values are None
     N = None
     for (max_subs, max_ins, max_dels) in [
         (N, 0, 0),
         (0, N, 0),
         (0, 0, N),
         (0, N, N),
         (N, 0, N),
         (N, N, 0),
         (N, N, N),
     ]:
         with self.subTest('max_subs={0}, max_ins={1}, max_dels={2}, max_l_dist=0'.format(
                 max_subs, max_ins, max_dels)):
             self.assertEqual(
                 self.search(b('a'), b('b'), max_subs, max_ins, max_dels, 0),
                 [],
             )
コード例 #24
0
    def test_one_changed_in_middle(self):
        substring = b('abcdefg')
        pattern = b('abcXefg')
        expected_match = Match(start=0, end=7, dist=1, matched=pattern)

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=0),
            [],
        )

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=1),
            [expected_match],
        )

        self.expectedOutcomes(
            self.search(substring, pattern, max_subs=2),
            [expected_match],
        )
コード例 #25
0
    def test_protein_search1(self):
        # see:
        # * BioPython archives from March 14th, 2014
        #   http://lists.open-bio.org/pipermail/biopython/2014-March/009030.html
        # * https://github.com/taleinat/fuzzysearch/issues/3
        text = b(''.join('''\
            XXXXXXXXXXXXXXXXXXXGGGTTVTTSSAAAAAAAAAAAAAGGGTTLTTSSAAAAAAAAAAAA
            AAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBGGGTTLTTSS
        '''.split()))
        pattern = b("GGGTTLTTSS")

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=0),
            [
                Match(start=42, end=52, dist=0, matched=text[42:52]),
                Match(start=99, end=109, dist=0, matched=text[99:109])
            ],
        )

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=1),
            [
                Match(start=19, end=29, dist=1, matched=text[19:29]),
                Match(start=42, end=52, dist=0, matched=text[42:52]),
                Match(start=99, end=109, dist=0, matched=text[99:109])
            ],
        )

        self.expectedOutcomes(
            self.search(pattern, text, max_subs=2),
            [
                Match(start=19, end=29, dist=1, matched=text[19:29]),
                Match(start=42, end=52, dist=0, matched=text[42:52]),
                Match(start=99, end=109, dist=0, matched=text[99:109])
            ],
        )
コード例 #26
0
    def test_missing_second_item(self):
        self.assertEqual(
            self.search(b('bde'), b('abcdefg'), 0, 1, 0, 1),
            [Match(start=1, end=5, dist=1)],
        )

        self.assertEqual(
            self.search(b('bde'), b('abcdefg'), 0, 0, 0, 0),
            [],
        )

        self.assertEqual(
            self.search(b('bde'), b('abcdefg'), 1, 0, 0, 1),
            [Match(start=2, end=5, dist=1)],
        )

        self.assertEqual(
            self.search(b('bde'), b('abcdefg'), 0, 0, 1, 1),
            [Match(start=3, end=5, dist=1)],
        )
コード例 #27
0
    def test_file_openers(self):
        import codecs
        import io

        needle = 'PATTERN'
        haystack = '---PATERN---'

        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as f:
            filename = f.name
            f.write(b(haystack))
        self.addCleanup(os.remove, filename)

        def test_file_bytes(f):
            self.assertEqual(
                find_near_matches_in_file(b(needle), f, max_l_dist=1),
                [Match(3, 9, 1, b('PATERN'))])

        def test_file_unicode(f):
            self.assertEqual(
                find_near_matches_in_file(u(needle), f, max_l_dist=1),
                [Match(3, 9, 1, u('PATERN'))])

        with open(filename, 'rb') as f:
            test_file_bytes(f)

        with open(filename, 'r') as f:
            if PY2:
                test_file_bytes(f)
            else:
                test_file_unicode(f)

        with codecs.open(filename, 'rb') as f:
            test_file_bytes(f)

        with codecs.open(filename, 'r') as f:
            test_file_unicode(f)

        with io.open(filename, 'rb') as f:
            test_file_bytes(f)

        with io.open(filename, 'r') as f:
            test_file_unicode(f)
コード例 #28
0
    def test_double_first_item(self):
        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), 0, 0, 0, 0),
            [Match(start=4, end=7, dist=0)],
        )

        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), 1, 0, 0, 1),
            [Match(start=4, end=7, dist=0)],
        )

        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), 0, 0, 1, 1),
            [Match(start=4, end=7, dist=0),
             Match(start=5, end=7, dist=1)]
        )

        self.expectedOutcomes(
            self.search(b('def'), b('abcddefg'), 0, 1, 0, 1),
            [Match(start=3, end=7, dist=1),
             Match(start=4, end=7, dist=0)],
        )
コード例 #29
0
    def test_subseq_length_less_than_max_l_dist(self):
        with self.assertRaises(ValueError):
            self.search(b('b'), b('abc'), 2, 2, 2, 2)

        with self.assertRaises(ValueError):
            self.search(b('b'), b('abc'), 5, 5, 5, 5)

        with self.assertRaises(ValueError):
            self.search(b('PATTERN'), b('PATTERN'),
                        len('PATTERN') + 1,
                        len('PATTERN') + 1,
                        len('PATTERN') + 1,
                        len('PATTERN') + 1,
                        )

        with self.assertRaises(ValueError):
            self.search(b('PATTERN'), b('PATTERN'),
                        len('PATTERN') + 7,
                        len('PATTERN') + 7,
                        len('PATTERN') + 7,
                        len('PATTERN') + 7,
                        )
コード例 #30
0
    def test_file_openers(self):
        import codecs
        import io

        needle = 'PATTERN'
        haystack = '---PATERN---'

        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as f:
            filename = f.name
            f.write(b(haystack))
        self.addCleanup(os.remove, filename)

        def test_file_bytes(f):
            self.assertEqual(find_near_matches_in_file(b(needle), f, max_l_dist=1),
                             [Match(3, 9, 1)])

        def test_file_unicode(f):
            self.assertEqual(find_near_matches_in_file(u(needle), f, max_l_dist=1),
                             [Match(3, 9, 1)])

        with open(filename, 'rb') as f:
            test_file_bytes(f)

        with open(filename, 'r') as f:
            if PY2:
                test_file_bytes(f)
            else:
                test_file_unicode(f)

        with codecs.open(filename, 'rb') as f:
            test_file_bytes(f)

        with codecs.open(filename, 'r') as f:
            test_file_unicode(f)

        with io.open(filename, 'rb') as f:
            test_file_bytes(f)

        with io.open(filename, 'r') as f:
            test_file_unicode(f)
コード例 #31
0
 def test_match_identical_sequence(self):
     self.expectedOutcomes(
         self.search(b('PATTERN'), b('PATTERN'), max_subs=0),
         [Match(start=0, end=len('PATTERN'), dist=0, matched=b('PATTERN'))],
     )
コード例 #32
0
 def test_empty_sequence(self):
     self.expectedOutcomes(self.search(b('PATTERN'), b(''), max_subs=0), [])
コード例 #33
0
 def test_missing_at_beginning(self):
     self.expectedOutcomes(
         self.search(b("ATTEST"), b("TESTOSTERONE"), max_subs=2),
         [],
     )
コード例 #34
0
 def test_empty_subsequence_exeption(self):
     with self.assertRaises(ValueError):
         self.search(b(''), b('TEXT'), max_subs=0)
コード例 #35
0
ファイル: test_memmem.py プロジェクト: taleinat/fuzzysearch
 def search(self, subsequence, sequence):
     return simple_memmem(b(subsequence), b(sequence))
コード例 #36
0
 def test_empty_sequence(self):
     self.expectedOutcomes(self.search(b('PATTERN'), b(''), max_subs=0), [])
コード例 #37
0
 def test_match_identical_sequence(self):
     self.expectedOutcomes(
         self.search(b('PATTERN'), b('PATTERN'), max_subs=0),
         [Match(start=0, end=len('PATTERN'), dist=0)],
     )
コード例 #38
0
 def test_file_bytes(f):
     self.assertEqual(
         find_near_matches_in_file(b(needle), f, max_l_dist=1),
         [Match(3, 9, 1, b('PATERN'))])
コード例 #39
0
 def test_match_identical_sequence(self):
     self.assertEqual(
         self.search(b('PATTERN'), b('PATTERN'), 0, 0, 0, 0),
         [Match(start=0, end=7, dist=0, matched=b('PATTERN'))],
     )
コード例 #40
0
ファイル: test_common.py プロジェクト: taleinat/fuzzysearch
 def count_diffs(self, seq1, seq2, max_diffs):
     return count_differences_with_maximum_byteslike(b(seq1), b(seq2),
                                                     max_diffs)
コード例 #41
0
 def test_empty_sequence(self):
     self.assertEqual(self.search(b('PATTERN'), b(''), 0, 0, 0, 0), [])
コード例 #42
0
 def test_missing_second_item_complex(self):
     self.assertTrue(self.search(b('bde'), b('abcdefg'), 1, 1, 1, 1))
コード例 #43
0
    def test_missing_second_item_complex(self):
        self.expectedOutcomes(
            self.search(b('bde'), b('abcdefg'), 1, 1, 1, 1),
            [
                Match(start=1, end=5, dist=1, matched=b('bcde')),
                Match(start=2, end=5, dist=1, matched=b('cde')),
                Match(start=3, end=5, dist=1, matched=b('de'))
            ],
        )

        self.assertTrue({
            Match(start=1, end=5, dist=1, matched=b('bcde')),
            Match(start=2, end=5, dist=1, matched=b('cde')),
            Match(start=3, end=5, dist=1, matched=b('de')),
            Match(start=2, end=5, dist=2, matched=b('bcd')),
        }.issubset(set(self.search(b('bde'), b('abcdefg'), 1, 1, 1, 3), )))
コード例 #44
0
 def test_match_identical_sequence(self):
     self.assertEqual(
         self.search(b('PATTERN'), b('PATTERN'), 0, 0, 0, 0),
         [Match(start=0, end=7, dist=0)],
     )
コード例 #45
0
 def test_empty_subsequence_exeption(self):
     with self.assertRaises(ValueError):
         self.search(b(''), b('TEXT'), max_subs=0)
コード例 #46
0
 def test_empty_sequence(self):
     self.assertEqual(self.search(b('PATTERN'), b(''), 0, 0, 0, 0), [])
コード例 #47
0
 def test_missing_at_beginning(self):
     self.expectedOutcomes(
         self.search(b("ATTEST"), b("TESTOSTERONE"), max_subs=2),
         [],
     )
コード例 #48
0
    def test_subsequence_split_between_chunks(self):
        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as f:
            filename = f.name
        self.addCleanup(os.remove, filename)

        for needle, haystack_match, max_l_dist, expected_matches in [
            (b('PATTERN'), b('PATERN'), 0, []),
            (b('PATTERN'), b('PATERN'), 1, [Match(0, 6, 1, b('PATERN'))]),
            (b('PATTERN'), b('PATERN'), 2, [Match(0, 6, 1, b('PATERN'))]),
            (b('PATTERN'), b('PATTERN'), 0, [Match(0, 7, 0, b('PATERN'))]),
        ]:
            for chunk_size, delta in product(
                [100, 2**10, 2**12, 2**18, 2**20],
                    sorted({-len(needle), -len(needle) + 1, -4, -2, -1, 0,
                            1})):
                if len(needle) // (max_l_dist + 1) < 3:
                    # no ngrams search, so skip long searches which will be slow
                    if chunk_size > 2**10:
                        continue
                with self.subTest(
                        needle=needle,
                        haystack_match=haystack_match,
                        max_l_dist=max_l_dist,
                        chunk_size=chunk_size,
                        delta=delta,
                ):
                    haystack = bytearray(chunk_size + 100)
                    haystack[chunk_size + delta:chunk_size + delta +
                             len(haystack_match)] = haystack_match
                    with open(filename, 'wb') as f:
                        f.write(haystack)

                    with open(filename, 'rb') as f:
                        self.assertEqual(
                            find_near_matches_in_file(needle,
                                                      f,
                                                      max_l_dist=max_l_dist,
                                                      _chunk_size=chunk_size),
                            [
                                attr.evolve(
                                    match,
                                    start=match.start + chunk_size + delta,
                                    end=match.end + chunk_size + delta,
                                    matched=haystack_match)
                                for match in expected_matches
                            ])

                        f.seek(0)

                        self.assertEqual(
                            find_near_matches_in_file(
                                needle,
                                f,
                                max_l_dist=max_l_dist,
                                _chunk_size=chunk_size // 2), [
                                    attr.evolve(
                                        match,
                                        start=match.start + chunk_size + delta,
                                        end=match.end + chunk_size + delta,
                                        matched=haystack_match)
                                    for match in expected_matches
                                ])

                    with open(filename, 'r') as f:
                        _needle = needle if PY2 else needle.decode('utf-8')
                        self.assertEqual(
                            find_near_matches_in_file(_needle,
                                                      f,
                                                      max_l_dist=max_l_dist,
                                                      _chunk_size=chunk_size),
                            [
                                attr.evolve(
                                    match,
                                    start=match.start + chunk_size + delta,
                                    end=match.end + chunk_size + delta,
                                    matched=haystack_match
                                    if PY2 else haystack.decode('utf-8'))
                                for match in expected_matches
                            ])

                    with io.open(filename, 'r', encoding='ascii') as f:
                        self.assertEqual(
                            find_near_matches_in_file(needle.decode('ascii'),
                                                      f,
                                                      max_l_dist=max_l_dist,
                                                      _chunk_size=chunk_size),
                            [
                                attr.evolve(
                                    match,
                                    start=match.start + chunk_size + delta,
                                    end=match.end + chunk_size + delta,
                                    matched=haystack_match
                                    if PY2 else haystack.decode('utf-8'))
                                for match in expected_matches
                            ])

                        f.seek(0)

                        self.assertEqual(
                            find_near_matches_in_file(
                                needle.decode('ascii'),
                                f,
                                max_l_dist=max_l_dist,
                                _chunk_size=chunk_size // 2), [
                                    attr.evolve(
                                        match,
                                        start=match.start + chunk_size + delta,
                                        end=match.end + chunk_size + delta,
                                        matched=haystack_match
                                        if PY2 else haystack.decode('utf-8'))
                                    for match in expected_matches
                                ])
コード例 #49
0
ファイル: test_memmem.py プロジェクト: taleinat/fuzzysearch
 def search(self, subsequence, sequence):
     return wordlen_memmem(b(subsequence), b(sequence))