def test_literal(self): ''' Simple literal should compile directly. ''' token = Token(Literal('abc')) token.compile() assert token.regexp == 'abc', repr(token.regexp)
def test_float(self): ''' A float is more complex, but still compiles. ''' token = Token(Float()) token.compile() assert token.regexp == \ '(?:[\\+\\-])?(?:(?:[0-9](?:[0-9])*)?\\.[0-9](?:[0-9])*|[0-9](?:[0-9])*(?:\\.)?)(?:[Ee](?:[\\+\\-])?[0-9](?:[0-9])*)?', \ repr(token.regexp)
def test_string_arg(self): ''' Skip anything(not just spaces) ''' words = Token('[a-z]+')[:] words.config.lexer(discard='.') parser = words.get_parse() results = parser('abc defXghi') assert results == ['abc', 'def', 'ghi'], results
def test_expr_with_functions(self): ''' Expression with function calls and appropriate binding. ''' #basicConfig(level=DEBUG) # pylint: disable-msg=C0111, C0321 class Call(Node): pass class Term(Node): pass class Factor(Node): pass class Expression(Node): pass value = Token(Float()) > 'value' name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() open_ = ~symbol('(') close = ~symbol(')') funcn = name > 'name' call = funcn & open_ & expr & close > Call term = call | value | open_ & expr & close > Term muldiv = symbol(Any('*/')) > 'operator' factor = term & (muldiv & term)[:] > Factor addsub = symbol(Any('+-')) > 'operator' expr += factor & (addsub & factor)[:] > Expression line = expr & Eos() line.config.trace(True).lexer() parser = line.get_parse_string() results = str26(parser('1 + 2*sin(3+ 4) - 5')[0]) assert results == """Expression +- Factor | `- Term | `- value '1' +- operator '+' +- Factor | +- Term | | `- value '2' | +- operator '*' | `- Term | `- Call | +- name 'sin' | `- Expression | +- Factor | | `- Term | | `- value '3' | +- operator '+' | `- Factor | `- Term | `- value '4' +- operator '-' `- Factor `- Term `- value '5'""", '[' + results + ']'
def test_ambiguity(self): ''' A (signed) integer will consume a - sign. ''' tokens = (Token(Integer()) | Token(r'\-'))[:] & Eos() self.examples([(lambda: list(tokens.parse_all('1-2')), "[['1', '-2']]")]) matchers = (Integer() | Literal('-'))[:] & Eos() self.examples([(lambda: list(matchers.parse_all('1-2')), "[['1', '-2'], ['1', '-', '2']]")])
def test_impossible(self): ''' Cannot compile arbitrary functions. ''' try: token = Token(Float() > (lambda x: x)) token.compile() assert False, 'Expected error' except LexerError: pass
def test_float(self): ''' A float is more complex, but still compiles. ''' token = Token(Float()) token.compile() assert token.regexp == \ '([\\+\\-]|)(([0-9]([0-9])*|)\\.[0-9]([0-9])*|' \ '[0-9]([0-9])*(\\.|))([Ee]([\\+\\-]|)[0-9]([0-9])*|)', \ repr(token.regexp)
def test_complete(self): ''' The complete flag indicates whether the entire token must be consumed. ''' #basicConfig(level=DEBUG) abc = Token('abc') incomplete = abc(Literal('ab')) incomplete.config.no_full_first_match() self.examples([(lambda: incomplete.parse('abc'), "None")]) abc = Token('abc') incomplete = abc(Literal('ab'), complete=False) incomplete.config.no_full_first_match() self.examples([(lambda: incomplete.parse('abc'), "['ab']")])
def test_good_error_msg(self): ''' Better error message with streams. ''' #basicConfig(level=DEBUG) words = Token('[a-z]+')[:] words.config.lexer() parser = words.get_parse_string() try: parser('abc defXghi') assert False, 'expected error' except RuntimeLexerError as err: assert str(err) == 'No lexer for \'Xghi\' at line 1 character 7 ' \ 'of str: \'abc defXghi\'.', str(err)
def test_bad_error_msg(self): ''' An ugly error message (can't we improve this?) ''' #basicConfig(level=DEBUG) words = Token('[a-z]+')[:] words.config.lexer() parser = words.get_parse() try: parser('abc defXghi') assert False, 'expected error' except RuntimeLexerError as err: assert str(err) == "No lexer for 'Xghi' at line 1 " \ "character 7 of str: 'abc defXghi'.", str(err)
def test_bad_space(self): ''' An unexpected character fails to match. ''' token = Token('a') token.config.clear().lexer(discard='b') parser = token.get_parse() assert parser('a') == ['a'], parser('a') assert parser('b') == None, parser('b') try: parser('c') assert False, 'expected failure' except RuntimeLexerError as err: assert str(err) == "No lexer for 'c' at line 1 " \ "character 0 of str: 'c'.", str(err)
def left_token(self, contents=False): matcher = Delayed() inner = Token(Any()) if contents: inner = inner(Or('a', 'b')) matcher += Optional(matcher) & inner return matcher
def test_mixed(self): ''' Cannot mix tokens and non-tokens at same level. ''' bad = Token(Any()) & Any() try: bad.get_parse() assert False, 'expected failure' except LexerError as err: assert str(err) == 'The grammar contains a mix of Tokens and ' \ 'non-Token matchers at the top level. If ' \ 'Tokens are used then non-token matchers ' \ 'that consume input must only appear "inside" ' \ 'Tokens. The non-Token matchers include: ' \ 'Any(None).', str(err) else: assert False, 'wrong exception'
def test_incomplete(self): ''' A token is not completely consumed (this doesn't raise error messages, it just fails to match). ''' token = Token('[a-z]+')(Any()) token.config.no_full_first_match() parser = token.get_parse_string() assert parser('a') == ['a'], parser('a') # even though this matches the token, the Any() sub-matcher doesn't # consume all the contents assert parser('ab') == None, parser('ab') token = Token('[a-z]+')(Any(), complete=False) token.config.no_full_first_match() parser = token.get_parse_string() assert parser('a') == ['a'], parser('a') # whereas this is fine, since complete=False assert parser('ab') == ['a'], parser('ab')
def test_defaults(self): ''' Basic configuration. ''' reals = (Token(Float()) >> float)[:] reals.config.lexer() parser = reals.get_parse() results = parser('1 2.3') assert results == [1.0, 2.3], results
def test_none_discard(self): ''' If discard is '', discard nothing. ''' token = Token('a') token.config.lexer(discard='').no_full_first_match() parser = token[1:].get_parse() result = parser('aa') assert result == ['a', 'a'], result try: parser(' a') except RuntimeLexerError as error: assert str26(error) == "No discard for ' a'.", str26(error)
n.context.start_line = start_line n.context.end_line = end_line return n return wrapper def set_and_return(obj, **kwargs): for name in kwargs: obj.__setattr__(name, kwargs[name]) return obj base = Token('[a-zA-Z][a-zA-Z0-9_]*') function = base identifier = base >> Symbol variable_identifier = identifier data_identifier = identifier sequential_identifier = identifier parallel_identifier = identifier symbol = Token('[^0-9a-zA-Z \t\r\n]') keyword = Token('[a-z]+') semi = symbol(';') colon = symbol(':') comma = symbol(',') dot = symbol('.') property = ~dot & (function_call | identifier)
class TypeSpecParser: int_tok = Token(r'int') float_tok = Token(r'float') str_tok = Token(r'str') unicode_tok = Token(r'unicode') bool_tok = Token(r'bool') unit_tok = Token(r'unit') var_tok = Token(r"'[a-zA-Z0-9]+") list_start = Token(r'\[') list_end = Token(r'\]') tuple_start = Token(r'\(') tuple_div = Token(r',') tuple_end = Token(r'\)') arrow_div = Token(r'\->') tight_typ = Delayed() typ = Delayed() num_typ = int_tok | float_tok # | long_tok | complex_tok str_typ = str_tok | unicode_tok base_typ = num_typ | str_typ | bool_tok | unit_tok | var_tok lst = ~list_start & typ & ~list_end > Lst empty_tup = ~tuple_start & ~tuple_end > Tup comma_tup = ~tuple_start & (typ & ~tuple_div)[1:] & ~tuple_end > Tup no_comma_tup = ~tuple_start & (typ & ~tuple_div)[1:] & typ & ~tuple_end > Tup tup = empty_tup | comma_tup | no_comma_tup arr = tight_typ & ~arrow_div & typ > Arr parens = ~tuple_start & typ & ~tuple_end tight_typ += base_typ | lst | tup | parens typ += arr | tight_typ @staticmethod def parse(s): try: return TypeSpecParser.typ.parse(s)[0] except (RuntimeLexerError, FullFirstMatchException): raise TypeIncorrectlySpecifiedError(s) @staticmethod def print_parse(s): try: return better_sexpr_to_tree(TypeSpecParser.typ.parse(s)[0]) except (RuntimeLexerError, FullFirstMatchException): raise TypeIncorrectlySpecifiedError(s)
def test_calculation(self): ''' We could do evaluation directly in the parser actions. but by using the nodes instead we allow future expansion into a full interpreter. ''' # pylint: disable-msg=C0111, C0321 class BinaryExpression(Node): op = lambda x, y: None def __float__(self): return self.op(float(self[0]), float(self[1])) class Sum(BinaryExpression): op = add class Difference(BinaryExpression): op = sub class Product(BinaryExpression): op = mul class Ratio(BinaryExpression): op = truediv class Call(Node): funs = {'sin': sin, 'cos': cos} def __float__(self): return self.funs[self[0]](self[1]) # we use unsigned float then handle negative values explicitly; # this lets us handle the ambiguity between subtraction and # negation which requires context (not available to the the lexer) # to resolve correctly. number = Token(UnsignedReal()) name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() factor = Delayed() real_ = Or(number >> float, ~symbol('-') & number >> (lambda x: -float(x))) open_ = ~symbol('(') close = ~symbol(')') trig = name(Or('sin', 'cos')) call = trig & open_ & expr & close > Call parens = open_ & expr & close value = parens | call | real_ ratio = value & ~symbol('/') & factor > Ratio prod = value & ~symbol('*') & factor > Product factor += prod | ratio | value diff = factor & ~symbol('-') & expr > Difference sum_ = factor & ~symbol('+') & expr > Sum expr += sum_ | diff | factor | value line = expr & Eos() parser = line.get_parse() def calculate(text): return float(parser(text)[0]) self.examples([(lambda: calculate('1'), '1.0'), (lambda: calculate('1 + 2*3'), '7.0'), (lambda: calculate('-1 - 4 / (3 - 1)'), '-3.0'), (lambda: calculate('1 -4 / (3 -1)'), '-1.0'), (lambda: str(calculate('1 + 2*sin(3+ 4) - 5'))[:5], '-2.68')])
def test_expression2(self): ''' As before, but with evaluation. ''' #basicConfig(level=DEBUG) # we could do evaluation directly in the parser actions. but by # using the nodes instead we allow future expansion into a full # interpreter # pylint: disable-msg=C0111, C0321 class BinaryExpression(Node): op = lambda x, y: None def __float__(self): return self.op(float(self[0]), float(self[1])) class Sum(BinaryExpression): op = add class Difference(BinaryExpression): op = sub class Product(BinaryExpression): op = mul class Ratio(BinaryExpression): op = truediv class Call(Node): funs = {'sin': sin, 'cos': cos} def __float__(self): return self.funs[self[0]](self[1]) # we use unsigned float then handle negative values explicitly; # this lets us handle the ambiguity between subtraction and # negation which requires context (not available to the the lexer) # to resolve correctly. number = Token(UnsignedFloat()) name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() factor = Delayed() float_ = Or(number >> float, ~symbol('-') & number >> (lambda x: -float(x))) open_ = ~symbol('(') close = ~symbol(')') trig = name(Or('sin', 'cos')) call = trig & open_ & expr & close > Call parens = open_ & expr & close value = parens | call | float_ ratio = value & ~symbol('/') & factor > Ratio prod = value & ~symbol('*') & factor > Product factor += prod | ratio | value diff = factor & ~symbol('-') & expr > Difference sum_ = factor & ~symbol('+') & expr > Sum expr += sum_ | diff | factor | value line = expr & Eos() parser = line.get_parse() def myeval(text): return float(parser(text)[0]) self.assertAlmostEqual(myeval('1'), 1) self.assertAlmostEqual(myeval('1 + 2*3'), 7) self.assertAlmostEqual(myeval('1 - 4 / (3 - 1)'), -1) self.assertAlmostEqual(myeval('1 -4 / (3 -1)'), -1) self.assertAlmostEqual(myeval('1 + 2*sin(3+ 4) - 5'), -2.68602680256)
def generate_type_tokens(iterable): for type in iterable: yield Token(type) >> Type