def test_constructor(self):
     s1, s2 = TextSearcher(u('')), TextSearcher(u(''))
     searcher = SearcherCollection(s1, s2)
     self.assertEqual(six.text_type, searcher.match_type)
     self.assertEqual([s1, s2], list(searcher))
     searcher = SearcherCollection([s1, s2])
     self.assertEqual(six.text_type, searcher.match_type)
     self.assertEqual([s1, s2], list(searcher))
     searcher = SearcherCollection(s1)
     self.assertEqual([s1], list(searcher))
     self.assertEqual(six.text_type, searcher.match_type)
 def test_multi_regex_multi_match(self):
     uut = SearcherCollection([
         RegexSearcher(u('omicron')),
         RegexSearcher(u('[eu]psilon')),
         TextSearcher(u('pi')),
         TextSearcher(u('iota')),
     ])
     match = uut.search(u('pi iota epsilon upsilon omicron'))
     self.assertIsNotNone(match)
     self.assertEqual(2, uut.index(match.searcher))
     self.assertEqual(0, match.start)
     self.assertEqual(2, match.end)
    def test_unicode_combining_characters(self):
        # Some unicode characters can be represented in multiple ways - for
        # example, an accented character may be a single code point (with the
        # accent baked in), or it may be the "normal" letter with a combining
        # code point. See https://docs.python.org/2/library/unicodedata.html.
        # The points below are for a capital C with a cedilla, first as a
        # composite character, second as a pairing of C and the cedilla
        # combining character.
        composite = six.unichr(0xC7)
        combining = six.unichr(0x43) + six.unichr(0x0327)

        # Test combinations of search and character
        for text in composite, combining:
            searcher = TextSearcher(text)
            self.assertIsNotNone(searcher.search(composite))
            self.assertIsNotNone(searcher.search(combining))
    def test_constructor_invalid(self):
        with self.assertRaises(ValueError):
            SearcherCollection([])
        with self.assertRaises(TypeError):
            SearcherCollection(1)
        with self.assertRaises(ValueError):
            SearcherCollection(TextSearcher(u('')), BytesSearcher(b''))

        NoSearchSearcher = type('NoSearchSearcher', (object,),
                                {'match_type': None})
        with self.assertRaises(TypeError):
            SearcherCollection(NoSearchSearcher())

        NoMatchTypeSearcher = type('NoMatchTypeSearcher', (object,),
                                   {'search': None})
        with self.assertRaises(TypeError):
            SearcherCollection(NoMatchTypeSearcher())
 def test_fail_using_bytes(self):
     with self.assertRaises(TypeError):
         TextSearcher(b'bytes type')
 def test_text_constructor(self):
     searcher = TextSearcher(u('some unicode'))
     self.assertEqual(searcher.match_type, six.text_type)
 def test_repr(self):
     # Only check no exceptions thrown
     match = SequenceMatch(TextSearcher(u('rho')), 'rho', 0, 3)
     repr(match)
 def test_repr(self):
     # Only check no exceptions thrown
     searcher = SearcherCollection([TextSearcher(u('epsilon')),
                                    RegexSearcher(u('[eu]psilon'))])
     repr(searcher)
 def test_repr(self):
     # Only check no exceptions thrown
     searcher = TextSearcher(u('rho'))
     repr(searcher)
 def test_multi_match(self):
     uut = TextSearcher(u('one'))
     match = uut.search(u('one two three two one'))
     self.assertIsNotNone(match)
     self.assertEqual(0, match.start)
     self.assertEqual(3, match.end)
 def test_single_match(self):
     uut = TextSearcher(u('one'))
     match = uut.search(u('the number one appears once'))
     self.assertIsNotNone(match)
     self.assertEqual(11, match.start)
     self.assertEqual(14, match.end)
 def test_no_match(self):
     uut = TextSearcher(u('I will never match'))
     self.assertEqual(None, uut.search(u('alpha beta gamma')))
 def test_not_patterns(self):
     with self.assertRaises(TypeError):
         TextSearcher(None)
     with self.assertRaises(TypeError):
         TextSearcher(5)