def test_unicode_combining_characters(self):
        # Some unicode characters can be represented in multiple ways - for
        # example, an accented character may be a single code point (with the
        # accent baked in), or it may be the "normal" letter with a combining
        # code point. See https://docs.python.org/2/library/unicodedata.html.
        # The points below are for a capital C with a cedilla, first as a
        # composite character, second as a pairing of C and the cedilla
        # combining character.
        composite = six.unichr(0xC7)
        combining = six.unichr(0x43) + six.unichr(0x0327)

        # Test combinations of search and character
        for text in composite, combining:
            searcher = TextSearcher(text)
            self.assertIsNotNone(searcher.search(composite))
            self.assertIsNotNone(searcher.search(combining))
 def test_single_match(self):
     uut = TextSearcher(u('one'))
     match = uut.search(u('the number one appears once'))
     self.assertIsNotNone(match)
     self.assertEqual(11, match.start)
     self.assertEqual(14, match.end)
 def test_multi_match(self):
     uut = TextSearcher(u('one'))
     match = uut.search(u('one two three two one'))
     self.assertIsNotNone(match)
     self.assertEqual(0, match.start)
     self.assertEqual(3, match.end)
 def test_no_match(self):
     uut = TextSearcher(u('I will never match'))
     self.assertEqual(None, uut.search(u('alpha beta gamma')))