def test_character_series(self): """ Test CharacterSeries parser classes. """ # Test with ascii characters self._test_multiple( parser.CharacterSeries(string.ascii_letters), [ ("abc", ["abc"]), ], must_finish=False) # Test the Letters and Alphanumerics classes with a few inputs and # outputs input_outputs = [ # Unicode strings (u"abc", [u"abc"]), # Mix of non-ascii characters (u"éèàâêùôöëäïüû", [u"éèàâêùôöëäïüû"]), (u"touché", [u"touché"]), ] self._test_multiple( parser.Letters(), input_outputs + [(string.digits, [])], must_finish=False ) self._test_multiple( parser.Alphanumerics(), input_outputs + [(string.digits, [string.digits])], must_finish=False )
def test_other_alphabets(self): """ Test that other alphabets also work. """ # While the DNS and WSR engines are mostly limited to western # European languages (i.e. windows-1252), that doesn't necessarily # mean the parser needs to be. input_outputs = [ (u"йцукенгшщзхъфывапролджэячсмитьбю", [u"йцукенгшщзхъфывапролджэячсмитьбю"]), ] self._test_multiple(parser.Letters(), input_outputs, must_finish=False) self._test_multiple(parser.Alphanumerics(), input_outputs, must_finish=False)
def test_character_series(self): """ Test CharacterSeries parser class. """ # Test with ascii characters self._test_multiple( parser.CharacterSeries(string.letters), [ ("abc", ["abc"]), ], must_finish=False) # Test with Unicode characters self._test_multiple( parser.Letters(), [ (u"abc", [u"abc"]), ], must_finish=False )
def test_repetition(self): """ Test repetition parser class. """ word = parser.Letters() whitespace = parser.Whitespace() p = parser.Repetition(parser.Alternative((word, whitespace))) # Test with ascii letters input_output = ( ("abc", ["abc"]), ("abc abc", ["abc", " ", "abc"]), ("abc abc\t\t\n cba", ["abc", " ", "abc", "\t\t\n ", "cba"]), ) self._test_single(p, input_output) # Test with non-ascii letters input_output = ( (u"êùö", [u"êùö"]), (u"êùö êùö", [u"êùö", u" ", u"êùö"]), (u"êùö êùö\t\t\n öùê", [u"êùö", " ", u"êùö", u"\t\t\n ", u"öùê"]), ) self._test_single(p, input_output)