def test_multiline_pattern(self): grammar = r''' start = /(?x) foo bar / $ ; ''' model = compile(grammar=trim(grammar)) print(codegen(model.rules[0].exp.sequence[0])) self.assertEqual( codegen(model.rules[0].exp.sequence[0]), urepr("self._pattern('(?x)\nfoo\nbar\n')").strip('"\'') ) grammar = r''' start = /(?x)foo\nbar blort/ $ ; ''' model = compile(grammar=trim(grammar)) print(codegen(model.rules[0].exp.sequence[0])) self.assertEqual( trim(codegen(model.rules[0].exp.sequence[0])), urepr("self._pattern('(?x)foo\\nbar\nblort')").strip(r'"\.') )
def _to_str(self, lean=False): comments = self.comments_str() if lean: params = '' else: params = ', '.join(self.param_repr(p) for p in self.params) if self.params else '' kwparams = '' if self.kwparams: kwparams = ', '.join('%s=%s' % (k, self.param_repr(v)) for (k, v) in self.kwparams.items()) if params and kwparams: params = '(%s, %s)' % (params, kwparams) elif kwparams: params = '(%s)' % (kwparams) elif params: if len(self.params) == 1: params = '::%s' % params else: params = '(%s)' % params base = ' < %s' % ustr(self.base.name) if self.base else '' return trim(self.str_template).format( name=self.name, base=base, params=params, exp=indent(self.exp._to_str(lean=lean)), comments=comments, is_name='@name\n' if self.is_name else '', )
def test_right_join(self): grammar = r''' start = (op)>{number}+ $ ; op = '+' | '-' ; number = /\d+/ ; ''' text = '1 + 2 - 3 + 4' model = compile(grammar, "test") self.assertEqual(trim(grammar).strip(), str(model).strip()) codegen(model) ast = model.parse(text) self.assertEqual(('+', '1', ('-', '2', ('+', '3', '4'))), ast)
def test_whitespace_no_newlines(self): grammar = """ @@whitespace :: /[\t ]+/ # this is just a token with any character but space and newline # it should finish before it capture space or newline character token = /[^ \n]+/; # expect whitespace to capture spaces between tokens, but newline # should be captured afterwards token2 = {token}* /\n/; # document is just list of this strings of tokens document = {@+:token2}* $; """ text = trim("""\ a b c d e f """) expected = [ (["a", "b"], "\n"), (["c", "d"], "\n"), (["e", "f"], "\n"), ] model = tatsu.compile(grammar, "document") ast = model.parse(text, start='document') self.assertEqual(expected, ast)
def test_pattern_concatenation(self): grammar = ''' start = {letters_digits}+ ; letters_digits = ?"[a-z]+" + ?'[0-9]+' ; ''' pretty = ''' start = {letters_digits}+ ; letters_digits = /[a-z]+/ + /[0-9]+/ ; ''' model = compile(grammar=grammar) ast = model.parse('abc123 def456') self.assertEqual(['abc123', 'def456'], ast) print(model.pretty()) self.assertEqual(trim(pretty), model.pretty())
def test_numbers_and_unicode(self): grammar = ''' rúle(1, -23, 4.56, 7.89e-11, Añez) = 'a' ; ''' rule2 = ''' rulé::Añez = '\\xf1' ; ''' rule3 = ''' rúlé::Añez = 'ñ' ; ''' if PY3: grammar += rule3 else: grammar += rule2 model = compile(grammar, "test") self.assertEqual(trim(grammar), ustr(model))
def parse(self, ctx): if isinstance(self.literal, str): text = self.literal if '\n' in text: text = trim(text) return eval(f'{"f" + repr(text)}', {}, dict(ctx.ast)) # pylint: disable=eval-used else: return self.literal
def _to_str(self, lean=False): exp = ustr(self.exp._to_str(lean=lean)) template = '[%s]' if isinstance(self.exp, Choice): template = trim(self.str_template) elif isinstance(self.exp, Group): exp = self.exp.exp return template % exp
def test_include(self): text = '''\ first #include :: "something" last\ ''' buf = MockIncludeBuffer(trim(text)) self.assertEqual('first\n\nINCLUDED "something"\nlast', buf.text)
def _to_str(self, lean=False): exp = ustr(self.exp._to_str(lean=lean)) template = '[%s]' if isinstance(self.exp, Choice): template = trim(self.str_template) elif isinstance(self.exp, Group): exp = self.exp.exp return template % exp
def render_fields(self, fields): abstract_template = trim(self.abstract_rule_template) abstract_rules = [ abstract_template.format(name=safe_name(rule.name)) for rule in self.node.rules ] abstract_rules = indent('\n'.join(abstract_rules)) whitespace = self.node.whitespace or self.node.directives.get( 'whitespace') if not whitespace: whitespace = 'None' elif isinstance(whitespace, RETYPE): whitespace = repr(whitespace) else: whitespace = 're.compile({0})'.format(repr(whitespace)) if self.node.nameguard is not None: nameguard = repr(self.node.nameguard) elif self.node.directives.get('nameguard') is not None: nameguard = self.node.directives.get('nameguard') else: nameguard = 'None' comments_re = repr(self.node.directives.get('comments')) eol_comments_re = repr(self.node.directives.get('eol_comments')) ignorecase = self.node.directives.get('ignorecase', 'None') left_recursion = self.node.directives.get('left_recursion', True) parseinfo = self.node.directives.get('parseinfo', True) namechars = repr(self.node.directives.get('namechars') or '') rules = '\n'.join( [self.get_renderer(rule).render() for rule in self.node.rules]) version = str(tuple(int(n) for n in str(timestamp()).split('.'))) keywords = '\n'.join(" %s," % repr(k) for k in sorted(self.keywords)) if keywords: keywords = '\n%s\n' % keywords fields.update( rules=indent(rules), start=self.node.rules[0].name, abstract_rules=abstract_rules, version=version, whitespace=whitespace, nameguard=nameguard, ignorecase=ignorecase, comments_re=comments_re, eol_comments_re=eol_comments_re, left_recursion=left_recursion, parseinfo=parseinfo, keywords=keywords, namechars=namechars, )
def test_36_params_and_keyword_params(self): grammar = ''' rule(A, kwdB=B) = 'a' ; ''' model = compile(grammar, "test") self.assertEqual(trim(grammar), str(model))
def test_35_only_keyword_params(self): grammar = ''' rule(kwdA=A, kwdB=B) = 'a' ; ''' model = compile(grammar, "test") self.assertEqual(trim(grammar), ustr(model))
def test_slashed_pattern(self): grammar = ''' start = ?"[a-z]+/[0-9]+" $ ; ''' model = compile(grammar=grammar) ast = model.parse('abc/123') self.assertEqual('abc/123', ast) print(model.pretty()) self.assertEqual(trim(grammar), model.pretty())
def test_raw_string(self): grammar = r''' start = r'am\nraw' ; ''' pretty = r''' start = 'am\\nraw' ; ''' model = compile(grammar, "start") print(model.pretty()) self.assertEqual(trim(pretty), model.pretty())
def render_fields(self, fields): template = trim(self.option_template) options = [ template.format(option=indent(self.rend(o))) for o in self.node.options ] options = '\n'.join(o for o in options) firstset = ' '.join(f[0] for f in sorted(self.node.lookahead()) if f) if firstset: error = 'expecting one of: ' + firstset else: error = 'no available options' fields.update(n=self.counter(), options=indent(options), error=repr(error))
def render_fields(self, fields): self.reset_counter() params = kwparams = '' if self.node.params: params = ', '.join( self.param_repr(self.rend(p)) for p in self.node.params ) if self.node.kwparams: kwparams = ', '.join( '%s=%s' % (k, self.param_repr(self.rend(v))) for k, v in self.kwparams.items() ) if params and kwparams: params = params + ', ' + kwparams elif kwparams: params = kwparams fields.update(params=params) defines = compress_seq(self.defines()) ldefs = set(safe_name(d) for d, l in defines if l) sdefs = set(safe_name(d) for d, l in defines if not l and d not in ldefs) if not (sdefs or ldefs): sdefines = '' else: sdefs = '[%s]' % ', '.join(urepr(d) for d in sorted(sdefs)) ldefs = '[%s]' % ', '.join(urepr(d) for d in sorted(ldefs)) if not ldefs: sdefines = '\n\n self.ast._define(%s, %s)' % (sdefs, ldefs) else: sdefines = indent( '\n' + trim(self.define_template % (sdefs, ldefs)) ) fields.update(defines=sdefines) fields.update( check_name='\n self._check_name()' if self.is_name else '', ) leftrec = self.node.is_leftrec fields.update(leftrec='\n@leftrec' if leftrec else '') fields.update(nomemo='\n@nomemo' if not self.node.is_memoizable and not leftrec else '')
def make_defines_declaration(self): defines = compress_seq(self.defines()) ldefs = oset(safe_name(d) for d, l in defines if l) sdefs = oset( safe_name(d) for d, l in defines if not l and d not in ldefs) if not (sdefs or ldefs): return '' else: sdefs = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs)) ldefs = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs)) if not ldefs: return '\n\n self._define(%s, %s)' % (sdefs, ldefs) else: return indent('\n' + trim(self.define_template % (sdefs, ldefs)))
def test_numbers_and_unicode(self): grammar = ''' rúle(1, -23, 4.56, 7.89e-11, Añez) = 'a' ; rúlé::Añez = 'ñ' ; ''' model = compile(grammar, "test") self.assertEqual(trim(grammar), str(model))
def render(self, template=None, **fields): fields.update(__class__=self.__class__.__name__) fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')}) override = self.render_fields(fields) if override is not None: template = override elif template is None: template = self.template try: return self._formatter.format(trim(template), **fields) except KeyError: # find the missing key keys = (p[1] for p in self._formatter.parse(template)) for key in keys: if key and key not in fields: raise KeyError(key, type(self)) raise
def render_fields(self, fields): template = trim(self.option_template) options = [ template.format(option=indent(self.rend(o))) for o in self.node.options ] options = '\n'.join(o for o in options) firstset = self.node.lookahead_str() if firstset: msglines = textwrap.wrap(firstset, width=40) error = ['expecting one of: '] + msglines else: error = ['no available options'] error = [repr(e) for e in error] fields.update( n=self.counter(), options=indent(options), error=error, )
def render(self, template=None, **fields): fields.update(__class__=self.__class__.__name__) fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')}) override = self.render_fields(fields) if override is not None: template = override elif template is None: template = self.template try: return self._formatter.format(trim(template), **fields) except KeyError: # find the missing key keys = (p[1] for p in self._formatter.parse(template)) for key in keys: if key and key not in fields: raise KeyError(key, type(self)) raise
def render(self, **fields): template = fields.pop('template', None) fields.update(__class__=self.__class__.__name__) fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')}) override = self.render_fields(fields) # pylint: disable=assignment-from-none if override is not None: template = override elif template is None: template = self.template try: return self._formatter.format(trim(template), **fields) except KeyError as e: # find the missing key keys = (p[1] for p in self._formatter.parse(template)) for key in keys: if key and key not in fields: raise KeyError(key, type(self)) from e raise
def test_based_rule(self): grammar = '''\ start = b $ ; a = @:'a' ; b < a = {@:'b'} ; ''' model = compile(grammar, "test") ast = model.parse("abb", nameguard=False) self.assertEqual(['a', 'b', 'b'], ast) self.assertEqual(trim(grammar), ustr(model))
def _to_str(self, lean=False): comments = self.comments_str() if lean: params = '' else: params = ', '.join( self.param_repr(p) for p in self.params ) if self.params else '' kwparams = '' if self.kwparams: kwparams = ', '.join( '%s=%s' % (k, self.param_repr(v)) for (k, v) in self.kwparams.items() ) if params and kwparams: params = '(%s, %s)' % (params, kwparams) elif kwparams: params = '(%s)' % (kwparams) elif params: if len(self.params) == 1: params = '::%s' % params else: params = '(%s)' % params base = ' < %s' % ustr(self.base.name) if self.base else '' return trim(self.str_template).format( name=self.name, base=base, params=params, exp=indent(self.exp._to_str(lean=lean)), comments=comments, is_name='@name\n' if self.is_name else '', )
def test_36_param_combinations(self): def assert_equal(target, value): self.assertEqual(target, value) class TC36Semantics: """Check all rule parameters for expected types and values""" def rule_positional(self, ast, p1, p2, p3, p4): assert_equal("ABC", p1) assert_equal(123, p2) assert_equal('=', p3) assert_equal("+", p4) return ast def rule_keyword(self, ast, k1, k2, k3, k4): assert_equal("ABC", k1) assert_equal(123, k2) assert_equal('=', k3) assert_equal('+', k4) return ast def rule_all(self, ast, p1, p2, p3, p4, k1, k2, k3, k4): assert_equal("DEF", p1) assert_equal(456, p2) assert_equal('=', p3) assert_equal("+", p4) assert_equal("HIJ", k1) assert_equal(789, k2) assert_equal('=', k3) assert_equal('+', k4) return ast grammar = ''' @@ignorecase::False @@nameguard start = {rule_positional | rule_keywords | rule_all} $ ; rule_positional('ABC', 123, '=', '+') = 'a' ; rule_keywords(k1=ABC, k3='=', k4='+', k2=123) = 'b' ; rule_all('DEF', 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789) = 'c' ; ''' pretty = ''' @@ignorecase :: False @@nameguard :: True start = {rule_positional | rule_keywords | rule_all} $ ; rule_positional(ABC, 123, '=', '+') = 'a' ; rule_keywords(k1=ABC, k3='=', k4='+', k2=123) = 'b' ; rule_all(DEF, 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789) = 'c' ; ''' model = compile(grammar, 'RuleArguments') self.assertEqual(trim(pretty), str(model)) model = compile(pretty, 'RuleArguments') ast = model.parse("a b c") self.assertEqual(['a', 'b', 'c'], ast) semantics = TC36Semantics() ast = model.parse("a b c", semantics=semantics) self.assertEqual(['a', 'b', 'c'], ast) codegen(model)
def trim(self, item, tabwidth=4): return trim(self.rend(item), tabwidth=tabwidth)
def trim(self, item, tabwidth=4): return trim(self.rend(item), tabwidth=tabwidth)
def _to_str(self, lean=False): exp = self.exp._to_ustr(lean=lean) if len(exp.splitlines()) > 1: return '(\n%s\n)' % indent(exp) else: return '(%s)' % trim(exp)
def _to_str(self, lean=False): exp = self.exp._to_ustr(lean=lean) if len(exp.splitlines()) > 1: return '(\n%s\n)' % indent(exp) else: return '(%s)' % trim(exp)
def test_pretty(self): grammar = r''' start = lisp ; lisp = sexp | list | symbol; sexp::SExp = '(' cons:lisp '.' ~ cdr:lisp ')' ; list::List = '(' elements:{sexp}* ')' ; symbol::Symbol = value:/[^\s().]+/ ; ''' pretty = trim(r''' start = lisp ; lisp = sexp | list | symbol ; sexp::SExp = '(' cons:lisp '.' ~ cdr:lisp ')' ; list::List = '(' elements:{sexp} ')' ; symbol::Symbol = value:/[^\s().]+/ ; ''') pretty_lean = trim(r''' start = lisp ; lisp = sexp | list | symbol ; sexp = '(' lisp '.' ~ lisp ')' ; list = '(' {sexp} ')' ; symbol = /[^\s().]+/ ; ''') model = compile(grammar=grammar) self.assertEqual(pretty, model.pretty()) self.assertEqual(str(model), model.pretty()) self.assertEqual(pretty_lean, model.pretty_lean())