def run_test(self, name, text, parse, match2, match3, error, config=lambda x: None, **kargs): matcher = Any()[:, ...] config(matcher) parser = getattr(matcher, 'parse' + name) result = str(parser(text, **kargs)) assert_str(result, parse) matcher = Any()[2, ...] matcher.config.no_full_first_match() config(matcher) parser = getattr(matcher, 'match' + name) result = str(list(parser(text, **kargs))) assert_str(result, match2) matcher = Any()[3, ...] matcher.config.no_full_first_match() config(matcher) parser = getattr(matcher, 'match' + name) result = str(list(parser(text, **kargs))) assert_str(result, match3) matcher = Any() config(matcher) parser = getattr(matcher, 'parse' + name) try: parser(text, **kargs) except FullFirstMatchException as e: assert_str(e, error)
def test_complex(self): ''' This test requires evaluation of sub-matchers via trampolining; if it fails then there may be an issue with generator_matcher. ''' parser = Consumer(Any() & Any('b')).get_parse() result = parser('ab') assert ['a', 'b'] == result, result
def test_post_condition(self): self.assert_fail('abc', PostCondition(Any()[3, ...], lambda x: False)) self.assert_fail( 'abc', PostCondition(Any()[3, ...], lambda x: len(x[0]) == 2)) self.assert_literal('abc', PostCondition(Any()[3, ...], lambda x: True)) self.assert_literal( 'abc', PostCondition(Any()[3, ...], lambda x: len(x[0]) == 3))
def UnsignedReal(decimal='.'): ''' Match a sequence of digits that may include a decimal point. This will match both integer and float values. ''' return Or( Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()), Join(UnsignedInteger(), Optional(Any(decimal))))
def UnsignedFloat(decimal='.'): ''' Match a sequence of digits that must include a decimal point. This will match real values that are not integers. ''' return Or( Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()), Join(UnsignedInteger(), Any(decimal)))
def assert_separator(self, stream, start, stop, algorithm, target): matcher = Repeat(Any('abc'), start, stop, algorithm=algorithm, separator=Any(',')) matcher.config.no_full_first_match() result = [''.join(l) for (l, _s) in matcher.match_string(stream)] assert target == result, result
def test_exclude_sequence(self): #basicConfig(level=DEBUG) stream = 'ababcdababcabcdbcd' matcher = ExcludeSequence(eq, 'abc') try: matcher(Any()[:, ...]).parse_null(stream) assert False, 'expected error' except FilterException as error: assert str(error) == 'Can only filter LocationStream instances.' result = matcher(Any()[:, ...]).parse_string(stream) assert result == ['abdabdbcd'], result
def test_table(self): #basicConfig(level=DEBUG) self.assert_direct( '''0123456789 abcdefghij ''', Columns(((0,3), Any()[3:,...]), ((0,4), Any()[4:,...]), ((5,8), Any()[3:,...]))[2], [['012', '0123', '567', 'abc', 'abcd', 'fgh']])
def SkipString(quote='"', escape='\\', ignore='\n', empty='', join=__add__): ''' Like `String`, matching across multiple lines, but will silently drop newlines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(ignore))) if escape: content = Or(content, And(Drop(escape), q)) content = Or(content, Drop(Any(ignore))) content = Repeat(content, reduce=(empty, join)) return And(Drop(q), content, Drop(q))
def test_add(self): #basicConfig(level=DEBUG) self.assert_list(['1','2'], Any() + Any(), [['12']], sub_list=False, join=''.join) self.assert_list(['1','2','3'], Any() + Any(), [['12']], sub_list=False, join=''.join) self.assert_list(['1','2','3'], Any() + Any() + Any(), [['123']], sub_list=False, join=''.join) self.assert_list(['1'], Any() + Any(), [])
def UnsignedEFloat(decimal='.', exponent='eE'): ''' As `UnsignedEReal`, but must contain a decimal or exponent. This will match real values that are not integers. ''' return Or(Join(UnsignedReal(decimal), Any(exponent), SignedInteger()), UnsignedFloat(decimal))
def UnsignedEReal(decimal='.', exponent='eE'): ''' Match an `UnsignedReal` followed by an optional exponent (e+02 etc). This will match both integer and float values. ''' return Join(UnsignedReal(decimal), Optional(And(Any(exponent), SignedInteger())))
def __init__(self): from lepl.matchers.core import Any from lepl.matchers.combine import Or max_ = chr(maxunicode) def mkhex(char, n): from lepl.matchers.derived import Drop return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \ (lambda x: chr(int(x, 16))) def mkchr(char, range, invert=False): from lepl.matchers.core import Literal from lepl.matchers.derived import Map from lepl.regexp.core import Character intervals = lmap(lambda x: (x, x), range) if invert: # this delays call to invert until after creation of self func = lambda _: Character(self.invert(intervals), self) else: func = lambda _: Character(intervals, self) return Map(Literal(char), func) range = Or(mkchr('s', _WHITESPACE), mkchr('S', _WHITESPACE, invert=True)) escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8) super(UnicodeAlphabet, self).__init__(chr(0), max_, escaped=escaped, range=range)
def test_depth(self): matcher = Any()[:,...] matcher.config.clear() matcher = matcher.get_match_string() #print(repr(matcher.matcher)) results = [m for (m, _s) in matcher('abc')] assert results == [['abc'], ['ab'], ['a'], []], results
def SignedEFloat(decimal='.', exponent='eE'): ''' Match a `SignedFloat` followed by an optional exponent (e+02 etc). ''' return Join(SignedFloat(decimal), Optional(Join(Any(exponent), SignedInteger())))
def test_slash(self): ab = Any('ab') self.assert_direct('ab', ab / ab, [['a', 'b']]) self.assert_direct('a b', ab / ab, [['a', ' ', 'b']]) self.assert_direct('a b', ab / ab, [['a', ' ', 'b']]) self.assert_direct('ab', ab // ab, []) self.assert_direct('a b', ab // ab, [['a', ' ', 'b']]) self.assert_direct('a b', ab // ab, [['a', ' ', 'b']])
def test_double(self): matcher = (Any() > append('x')) > append('y') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'axy', result assert isinstance(parser.matcher, TransformableWrapper) assert len(parser.matcher.wrapper.functions) == 2
def test_loop(self): matcher = Delayed() matcher += (Any() | matcher) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert isinstance(parser.matcher, Delayed)
def test_liberal(self): matcher = Delayed() matcher += matcher | Any() assert isinstance(matcher.matcher.matchers[0], Delayed) matcher.config.clear().optimize_or(False) matcher.get_parse_string() # TODO - better test assert isinstance(matcher.matcher.matchers[0], TransformableWrapper)
def AnyBut(exclude=None): ''' Match any character except those specified (or, if a matcher is used as the exclude, if the matcher fails). The argument should be a list of tokens (or a string of suitable characters) to exclude, or a matcher. If omitted all tokens are accepted. ''' return And(~Lookahead(coerce_(exclude, Any)), Any())
def test_all(self): first = Join(UnsignedFloat(), Any('eE'), SignedInteger()) second = UnsignedFloat() all = Or(first, second) all.config.default() # wrong order #all.config.compile_to_dfa() # gives 1.e3 only #all.config.compile_to_nfa() # wrong order #all.config.no_compile_to_regexp() # ok #all.config.clear() # ok self.assert_direct('1.e3', all, [['1.e3'], ['1.']])
def SingleLineString(quote='"', escape='\\', exclude='\n'): ''' Like `String`, but will not match across multiple lines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(exclude))) if escape: content = Or(content, And(Drop(escape), q)) content = Repeat(content, add_=True) return And(Drop(q), content, Drop(q))
def test_separator_mixin(self): #basicConfig(level=DEBUG) abc = Any('abc') self.assert_separator_mixin(abc[1:1:'d', ','], 'a', ['a']) self.assert_separator_mixin(abc[1:1:'b', ','], 'a', ['a']) self.assert_separator_mixin(abc[1:2:'d', ','], 'a,b', ['a,b', 'a']) self.assert_separator_mixin(abc[1:2:'b', ','], 'a,b', ['a', 'a,b']) self.assert_separator_mixin(abc[2:3:'d', ','], 'a,b,c,a', ['a,b,c', 'a,b']) self.assert_separator_mixin(abc[2:3:'b', ','], 'a,b,c,a', ['a,b', 'a,b,c'])
def test_exclude(self): #basicConfig(level=DEBUG) def vowel(x): return x in 'aeiou' def parser(matcher): matcher.config.no_full_first_match() return matcher.get_match_string() stream1 = 'abcdef\nghijklm\n' (match, _stream) = next( parser(Exclude(vowel)(Any()[:]))('abcdef\nghijklm\n')) assert match[0:2] == ['b', 'c'], match[0:2] (_result, stream) = next(parser(Exclude(vowel)(Any()[0]))(stream1)) assert stream[0] == 'a', stream[0] (_result, stream) = next(parser(Exclude(vowel)(Any()))(stream1)) assert stream[0] == 'c', stream[0] (_result, stream) = next(parser(Exclude(vowel)(Any()[5]))(stream1)) assert stream.line_number == 2, stream.line_number == 2 assert stream.line_offset == 0, stream.line_offset == 0 assert len(match) == 12, len(match)
def SignedEReal(decimal='.', exponent='eE'): ''' Match a `SignedReal` followed by an optional exponent (e+02 etc). This will match both integer and float values. ''' if decimal == '.' and exponent == 'eE': # hack to faster direct implementation for now return NfaRegexp( r'[\+\-]?(?:[0-9]*\.[0-9]+|[0-9]+\.|[0-9]+)(?:[eE][\+\-]?[0-9]+)?') else: return Join(SignedReal(decimal), Optional(Join(Any(exponent), SignedInteger())))
def __init__(self, clean_html=True): self.clean_html = clean_html self._punctuation = '!"#&\'()*+,.;<=>?@[\\]^_`{|}~' self._lctx_1_exceptions = set('/ :'.split()) self._lctx_2_exceptions = set('discount redeem voucher'.split()) self._rctx_1_exceptions = set('/ : th am pm hour hours %'.split()) self._rctx_2_exceptions = set('discount redeem voucher'.split()) # LEPL Real Number Matchers (w/thousands) _comma_three_digits = Join(Drop(','), Add(Digit()[3]))[:] _thousand_group = Or( Join(_comma_three_digits, Any('.'), UnsignedInteger()), Join(_comma_three_digits, Optional(Any('.')))) _real = Or(Join(UnsignedInteger(), _thousand_group), UnsignedReal()) >> float _any = Join(Star(AnyBut(_real))) self._real_partition_matcher = Star(And(_any, _real, _any)) self._real_simple_matcher = _real[:, Drop( Star(Or(Whitespace(), Any(',-'))) )]
def SignedEFloat(decimal='.', exponent='eE'): ''' As `SignedEReal`, but must contain a decimal or exponent. This will match real values that are not integers. ''' if decimal == '.' and exponent == 'eE': # hack to faster direct implementation for now return NfaRegexp( r'[\+\-]?(?:[0-9]*\.[0-9]+(?:[eE][\+\-]?[0-9]+)?|[0-9]+\.(?:[eE][\+\-]?[0-9]+)?|[0-9]+[eE][\+\-]?[0-9]+)' ) else: return Or(Join(SignedReal(decimal), Any(exponent), SignedInteger()), SignedFloat(decimal))
def test_context(self): #basicConfig(level=DEBUG) output = StringIO() with TraceVariables(out=output): bar = Any() bar.config.no_full_first_match() repr(bar) list(bar.match('abc')) text = output.getvalue() assert_str( text, ''' bar = ['a'] stream = 'bc' bar failed stream = 'abc' ''')
def SingleLineString(quote='"', escape='\\', exclude='\n', empty='', join=__add__): ''' Like `String`, but will not match across multiple lines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(exclude))) if escape: content = Or(content, And(Drop(escape), q)) content = Repeat(content, reduce=(empty, join)) return And(Drop(q), content, Drop(q))
def test_node(self): class Term(Node): pass number = Any('1') > 'number' term = number > Term factor = term | Drop(Optional(term)) factor.config.clear().compose_transforms() p = factor.get_parse_string() ast = p('1')[0] assert type(ast) == Term, type(ast) assert ast[0] == '1', ast[0] assert str26(ast) == """Term `- number '1'""", ast