def test_unicode_combining_characters(self): # Some unicode characters can be represented in multiple ways - for # example, an accented character may be a single code point (with the # accent baked in), or it may be the "normal" letter with a combining # code point. See https://docs.python.org/2/library/unicodedata.html. # The points below are for a capital C with a cedilla, first as a # composite character, second as a pairing of C and the cedilla # combining character. composite = six.unichr(0xC7) combining = six.unichr(0x43) + six.unichr(0x0327) # Test combinations of search and character for text in composite, combining: searcher = TextSearcher(text) self.assertIsNotNone(searcher.search(composite)) self.assertIsNotNone(searcher.search(combining))
def test_single_match(self): uut = TextSearcher(u('one')) match = uut.search(u('the number one appears once')) self.assertIsNotNone(match) self.assertEqual(11, match.start) self.assertEqual(14, match.end)
def test_multi_match(self): uut = TextSearcher(u('one')) match = uut.search(u('one two three two one')) self.assertIsNotNone(match) self.assertEqual(0, match.start) self.assertEqual(3, match.end)
def test_no_match(self): uut = TextSearcher(u('I will never match')) self.assertEqual(None, uut.search(u('alpha beta gamma')))