def test_and(self): matcher = (Any() & Optional(Any())) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert is_child(parser.matcher, And), type(parser.matcher)
def test_add(self): rx = Add(And(Any('a'), Any('b'))) rx.config.clear().compile_to_nfa(force=True) matcher = rx.get_match_null() results = list(matcher('abq')) assert results == [(['ab'], 'q')], results assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher
def test_common_child(self): a = Any('a') b = a | Any('b') c = a | b | Any('c') matcher = a | b | c self.assert_clone(matcher) self.assert_relative(matcher)
def test_safety(self): matcher3 = Delayed() matcher4 = Delayed() matcher1 = Any()[::'b', ...] & Eos() with Separator(Drop(Any('a')[:])): matcher2 = Any()[::'b', ...] & Eos() # pylint: disable-msg=W0613 def target(matcher3=matcher3, matcher4=matcher4): matcher3 += Any()[::'b', ...] & Eos() with Separator(Drop(Any('b')[:])): matcher4 += Any()[::'b', ...] & Eos() t = Thread(target=target) t.start() t.join() matcher5 = Any()[::'b', ...] & Eos() matcher6 = Any()[::'b', ...] & Eos() text = 'cababab' assert text == matcher1.parse_string(text)[0], matcher1.parse_string( text) assert 'cbbb' == matcher2.parse_string(text)[0], matcher2.parse_string( text) assert text == matcher3.parse_string(text)[0], matcher3.parse_string( text) assert 'caaa' == matcher4.parse_string(text)[0], matcher4.parse_string( text) assert 'cbbb' == matcher5.parse_string(text)[0], matcher5.parse_string( text) assert text == matcher6.parse_string(text)[0], matcher6.parse_string( text)
def test_complex_loop(self): delayed1 = Delayed() delayed2 = Delayed() line1 = Any('a') | Any('b')[1:2,...] | delayed1 line2 = delayed1 & delayed2 matcher = line1 | line2 | delayed1 | delayed2 > 'foo' self.assert_clone(matcher) self.assert_relative(matcher)
def test_expr_with_functions(self): ''' Expression with function calls and appropriate binding. ''' #basicConfig(level=DEBUG) # pylint: disable-msg=C0111, C0321 class Call(Node): pass class Term(Node): pass class Factor(Node): pass class Expression(Node): pass value = Token(Float()) > 'value' name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() open_ = ~symbol('(') close = ~symbol(')') funcn = name > 'name' call = funcn & open_ & expr & close > Call term = call | value | open_ & expr & close > Term muldiv = symbol(Any('*/')) > 'operator' factor = term & (muldiv & term)[:] > Factor addsub = symbol(Any('+-')) > 'operator' expr += factor & (addsub & factor)[:] > Expression line = expr & Eos() line.config.trace(True).lexer() parser = line.get_parse_string() results = str26(parser('1 + 2*sin(3+ 4) - 5')[0]) assert results == """Expression +- Factor | `- Term | `- value '1' +- operator '+' +- Factor | +- Term | | `- value '2' | +- operator '*' | `- Term | `- Call | +- name 'sin' | `- Expression | +- Factor | | `- Term | | `- value '3' | +- operator '+' | `- Factor | `- Term | `- value '4' +- operator '-' `- Factor `- Term `- value '5'""", '[' + results + ']'
def test_or(self): #basicConfig(level=DEBUG) rx = Any('a') | Any('b') rx.config.clear().compile_to_nfa(force=True) matcher = rx.get_match_null() results = list(matcher('bq')) assert results == [(['b'], 'q')], results results = list(matcher('aq')) assert results == [(['a'], 'q')], results assert isinstance(matcher.matcher, NfaRegexp)
def test_node(self): #basicConfig(level=DEBUG) class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:, ...] > 'number' term = (number | '(' / expression / ')') > Term muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0::]) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0::]) > Expression p = expression.get_parse_string() ast = p('1 + 2 * (3 + 4 - 5)') assert_str( ast[0], """Expression +- Factor | +- Term | | `- number '1' | `- ' ' +- operator '+' +- ' ' `- Factor +- Term | `- number '2' +- ' ' +- operator '*' +- ' ' `- Term +- '(' +- Expression | +- Factor | | +- Term | | | `- number '3' | | `- ' ' | +- operator '+' | +- ' ' | +- Factor | | +- Term | | | `- number '4' | | `- ' ' | +- operator '-' | +- ' ' | `- Factor | `- Term | `- number '5' `- ')'""")
def test_depth(self): matcher = Any()[:,...] matcher.config.full_first_match(False) matcher = matcher.get_match_string() #print(repr(matcher.matcher)) results = [m for (m, _s) in matcher('abc')] assert results == [['abc'], ['ab'], ['a'], []], results
def left_token(self, contents=False): matcher = Delayed() inner = Token(Any()) if contents: inner = inner(Or('a', 'b')) matcher += Optional(matcher) & inner return matcher
def test_dynamic(self): size = IntVar() header = Apply(UnsignedInteger(), size.setter()) body = Repeat(Any(), stop=size, add_=True) matcher = ~header & body matcher.config.no_compile_to_regexp().no_full_first_match() result = next(matcher.match_string("3abcd"))[0] assert result == ['abc'], result
def test_mixed(self): ''' Cannot mix tokens and non-tokens at same level. ''' bad = Token(Any()) & Any() try: bad.get_parse() assert False, 'expected failure' except LexerError as err: assert str(err) == 'The grammar contains a mix of Tokens and ' \ 'non-Token matchers at the top level. If ' \ 'Tokens are used then non-token matchers ' \ 'that consume input must only appear "inside" ' \ 'Tokens. The non-Token matchers include: ' \ 'Any(None).', str(err) else: assert False, 'wrong exception'
def test_double(self): matcher = (Any() > append('x')) > append('y') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'axy', result # TODO - better test assert isinstance(parser.matcher, TransformableWrapper)
def test_error(self): #basicConfig(level=INFO) class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:, ...] > 'number' term = Or( AnyBut(Space() | Digit() | '(')[1:, ...] ^ 'unexpected text: {results[0]}', number > Term, number**make_error("no ( before '{stream_out}'") / ')' >> node_throw, '(' / expression / ')' > Term, ('(' / expression / Eos())**make_error("no ) for '{stream_in}'") >> node_throw) muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0:, r'\s*']) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0:, r'\s*']) > Expression line = expression / Eos() parser = line.get_parse_string() try: parser('1 + 2 * 3 + 4 - 5)')[0] assert False, 'expected error' except SyntaxError as e: assert e.msg == "no ( before ')'", e.msg try: parser('1 + 2 * (3 + 4 - 5') assert False, 'expected error' except SyntaxError as e: assert e.msg == "no ) for '(3 + 4 - 5'", e.msg try: parser('1 + 2 * foo') assert False, 'expected error' except SyntaxError as e: assert e.msg == "unexpected text: foo", e.msg
def test_ok(self): matcher = Any('a') matcher.config.full_first_match(eos=False) result = list(matcher.match_null('a')) assert result == [(['a'], '')], result matcher.config.full_first_match(eos=True) result = list(matcher.match_null('a')) assert result == [(['a'], '')], result
def test_any(self): #basicConfig(level=DEBUG) char = Any() char.config.clear().compile_to_nfa(force=True) matcher = char.get_match_null() results = list(matcher('abc')) assert results == [(['a'], 'bc')], results assert isinstance(matcher.matcher, NfaRegexp)
def test_loop(self): matcher = Delayed() matcher += (Any() | matcher) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert isinstance(parser.matcher, Delayed)
def test_stream(self): matcher = Any('a') matcher.config.clear() result = list(matcher.match('b')) assert result == [], result (stream, _memory) = facade_factory('b') result = list(matcher.match_null(stream)) assert result == [], result
def test_incomplete(self): ''' A token is not completely consumed (this doesn't raise error messages, it just fails to match). ''' token = Token('[a-z]+')(Any()) token.config.no_full_first_match() parser = token.get_parse_string() assert parser('a') == ['a'], parser('a') # even though this matches the token, the Any() sub-matcher doesn't # consume all the contents assert parser('ab') == None, parser('ab') token = Token('[a-z]+')(Any(), complete=False) token.config.no_full_first_match() parser = token.get_parse_string() assert parser('a') == ['a'], parser('a') # whereas this is fine, since complete=False assert parser('ab') == ['a'], parser('ab')
def test_location(self): matcher = FullFirstMatch(Any('a')[:] & Eos()) matcher.config.clear() try: list(matcher.match_string('aab')) assert False, 'expected error' except FullFirstMatchException as e: assert str(e) == """The match failed at 'b', Line 1, character 2 of str: 'aab'.""", str(e)
def test_liberal(self): matcher = Delayed() matcher += matcher | Any() assert isinstance(matcher.matcher.matchers[0], Delayed) matcher.config.clear().optimize_or(False) matcher.get_parse_string() # TODO - better test assert isinstance(matcher.matcher.matchers[0], TransformableWrapper)
def test_eos(self): matcher = Optional(Any('a')) matcher.config.full_first_match(eos=True) try: list(matcher.match('b')) assert False, 'expected error' except FullFirstMatchException as e: assert str(e) == """The match failed at 'b', Line 1, character 0 of str: 'b'.""", str(e)
def test_list(self): #basicConfig(level=DEBUG) expression = Delayed() number = Digit()[1:, ...] > 'number' term = (number | '(' / expression / ')') > list muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0:]) > list addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0:]) > list ast = expression.parse_string('1 + 2 * (3 + 4 - 5)') assert ast == [[[[('number', '1')], ' '], ('operator', '+'), ' ', [[('number', '2')], ' ', ('operator', '*'), ' ', [ '(', [[[('number', '3')], ' '], ('operator', '+'), ' ', [[('number', '4')], ' '], ('operator', '-'), ' ', [[('number', '5')]]], ')' ]]]], ast
def test_transformed_etc(self): class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:,...] > 'number' term = (number | '(' / expression / ')') > Term muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0::]) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0::]) > Expression self.assert_clone(expression) self.assert_relative(expression) expression.config.no_full_first_match().no_compile_to_regexp() expression.config.no_compose_transforms().no_direct_eval() expression.config.no_flatten() copy = expression.get_parse_string().matcher self._assert_clone(expression, copy)
def test_complex(self): #basicConfig(level=DEBUG) rx = Literal('foo') | (Literal('ba') + Any('a')[1:,...]) rx.config.compile_to_nfa().no_full_first_match() matcher = rx.get_match_null() results = list(matcher('foo')) assert results == [(['foo'], '')], results results = list(matcher('baaaaax')) assert results == [(['baaaaa'], 'x'), (['baaaa'], 'ax'), (['baaa'], 'aax'), (['baa'], 'aaax')], results results = list(matcher('ba')) assert results == [], results assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher
def test_left2(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter | (seq & letter) seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match_string() results = list(p('abcdef')) assert len(results) == 6, len(results) assert results[0][0] == ['a'], results[0][0] assert results[1][0] == ['a', 'b'], results[1][0]
def test_left1a(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += Optional(seq) & letter seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match() #print(p.matcher) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def test_node(self): class Term(Node): pass number = Any('1') > 'number' term = number > Term factor = term | Drop(Optional(term)) factor.config.clear().compose_transforms() p = factor.get_parse_string() ast = p('1')[0] assert type(ast) == Term, type(ast) assert ast[0] == '1', ast[0] assert str26(ast) == """Term `- number '1'""", ast
def test_dfs(self): expected = [(['abcd'], ''), (['abc'], 'd'), (['ab'], 'cd'), (['a'], 'bcd'), ([], 'abcd')] rx = Any()[:, ...] # do un-rewritten to check whether [] or [''] is correct rx.config.clear() matcher = rx.get_match_null() results = list(matcher('abcd')) assert results == expected, results rx.config.compile_to_nfa() matcher = rx.get_match_null() results = list(matcher('abcd')) assert results == expected, results assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher
def test_right(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter & Optional(seq) #print(seq.tree()) seq.config.clear().right_memoize().trace_stack(True) #seq.config.clear().right_memoize() p = seq.get_match_string() #print(p.matcher.tree()) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]