def test_constructor(self): s1, s2 = TextSearcher(u('')), TextSearcher(u('')) searcher = SearcherCollection(s1, s2) self.assertEqual(six.text_type, searcher.match_type) self.assertEqual([s1, s2], list(searcher)) searcher = SearcherCollection([s1, s2]) self.assertEqual(six.text_type, searcher.match_type) self.assertEqual([s1, s2], list(searcher)) searcher = SearcherCollection(s1) self.assertEqual([s1], list(searcher)) self.assertEqual(six.text_type, searcher.match_type)
def test_multi_regex_multi_match(self): uut = SearcherCollection([ RegexSearcher(u('omicron')), RegexSearcher(u('[eu]psilon')), TextSearcher(u('pi')), TextSearcher(u('iota')), ]) match = uut.search(u('pi iota epsilon upsilon omicron')) self.assertIsNotNone(match) self.assertEqual(2, uut.index(match.searcher)) self.assertEqual(0, match.start) self.assertEqual(2, match.end)
def test_unicode_combining_characters(self): # Some unicode characters can be represented in multiple ways - for # example, an accented character may be a single code point (with the # accent baked in), or it may be the "normal" letter with a combining # code point. See https://docs.python.org/2/library/unicodedata.html. # The points below are for a capital C with a cedilla, first as a # composite character, second as a pairing of C and the cedilla # combining character. composite = six.unichr(0xC7) combining = six.unichr(0x43) + six.unichr(0x0327) # Test combinations of search and character for text in composite, combining: searcher = TextSearcher(text) self.assertIsNotNone(searcher.search(composite)) self.assertIsNotNone(searcher.search(combining))
def test_constructor_invalid(self): with self.assertRaises(ValueError): SearcherCollection([]) with self.assertRaises(TypeError): SearcherCollection(1) with self.assertRaises(ValueError): SearcherCollection(TextSearcher(u('')), BytesSearcher(b'')) NoSearchSearcher = type('NoSearchSearcher', (object,), {'match_type': None}) with self.assertRaises(TypeError): SearcherCollection(NoSearchSearcher()) NoMatchTypeSearcher = type('NoMatchTypeSearcher', (object,), {'search': None}) with self.assertRaises(TypeError): SearcherCollection(NoMatchTypeSearcher())
def test_fail_using_bytes(self): with self.assertRaises(TypeError): TextSearcher(b'bytes type')
def test_text_constructor(self): searcher = TextSearcher(u('some unicode')) self.assertEqual(searcher.match_type, six.text_type)
def test_repr(self): # Only check no exceptions thrown match = SequenceMatch(TextSearcher(u('rho')), 'rho', 0, 3) repr(match)
def test_repr(self): # Only check no exceptions thrown searcher = SearcherCollection([TextSearcher(u('epsilon')), RegexSearcher(u('[eu]psilon'))]) repr(searcher)
def test_repr(self): # Only check no exceptions thrown searcher = TextSearcher(u('rho')) repr(searcher)
def test_multi_match(self): uut = TextSearcher(u('one')) match = uut.search(u('one two three two one')) self.assertIsNotNone(match) self.assertEqual(0, match.start) self.assertEqual(3, match.end)
def test_single_match(self): uut = TextSearcher(u('one')) match = uut.search(u('the number one appears once')) self.assertIsNotNone(match) self.assertEqual(11, match.start) self.assertEqual(14, match.end)
def test_no_match(self): uut = TextSearcher(u('I will never match')) self.assertEqual(None, uut.search(u('alpha beta gamma')))
def test_not_patterns(self): with self.assertRaises(TypeError): TextSearcher(None) with self.assertRaises(TypeError): TextSearcher(5)