Esempio n. 1
0
    def test_multiline_pattern(self):
        grammar = r'''
            start =
            /(?x)
            foo
            bar
            / $ ;
        '''
        model = compile(grammar=trim(grammar))
        print(codegen(model.rules[0].exp.sequence[0]))
        self.assertEqual(
            codegen(model.rules[0].exp.sequence[0]),
            urepr("self._pattern('(?x)\nfoo\nbar\n')").strip('"\'')
        )

        grammar = r'''
            start =
            /(?x)foo\nbar
            blort/ $ ;
        '''
        model = compile(grammar=trim(grammar))
        print(codegen(model.rules[0].exp.sequence[0]))
        self.assertEqual(
            trim(codegen(model.rules[0].exp.sequence[0])),
            urepr("self._pattern('(?x)foo\\nbar\nblort')").strip(r'"\.')
        )
Esempio n. 2
0
    def _to_str(self, lean=False):
        comments = self.comments_str()
        if lean:
            params = ''
        else:
            params = ', '.join(self.param_repr(p)
                               for p in self.params) if self.params else ''

            kwparams = ''
            if self.kwparams:
                kwparams = ', '.join('%s=%s' % (k, self.param_repr(v))
                                     for (k, v) in self.kwparams.items())

            if params and kwparams:
                params = '(%s, %s)' % (params, kwparams)
            elif kwparams:
                params = '(%s)' % (kwparams)
            elif params:
                if len(self.params) == 1:
                    params = '::%s' % params
                else:
                    params = '(%s)' % params

        base = ' < %s' % ustr(self.base.name) if self.base else ''

        return trim(self.str_template).format(
            name=self.name,
            base=base,
            params=params,
            exp=indent(self.exp._to_str(lean=lean)),
            comments=comments,
            is_name='@name\n' if self.is_name else '',
        )
Esempio n. 3
0
    def test_right_join(self):
        grammar = r'''
            start
                =
                (op)>{number}+ $
                ;


            op
                =
                '+' | '-'
                ;


            number
                =
                /\d+/
                ;
        '''
        text = '1 + 2 - 3 + 4'

        model = compile(grammar, "test")
        self.assertEqual(trim(grammar).strip(), str(model).strip())
        codegen(model)

        ast = model.parse(text)
        self.assertEqual(('+', '1', ('-', '2', ('+', '3', '4'))), ast)
Esempio n. 4
0
    def test_whitespace_no_newlines(self):
        grammar = """
            @@whitespace :: /[\t ]+/
            # this is just a token with any character but space and newline
            # it should finish before it capture space or newline character
            token = /[^ \n]+/;
            # expect whitespace to capture spaces between tokens, but newline
            # should be captured afterwards
            token2 = {token}* /\n/;
            # document is just list of this strings of tokens
            document = {@+:token2}* $;
        """
        text = trim("""\
            a b
            c d
            e f
        """)

        expected = [
            (["a", "b"], "\n"),
            (["c", "d"], "\n"),
            (["e", "f"], "\n"),
        ]

        model = tatsu.compile(grammar, "document")
        ast = model.parse(text, start='document')
        self.assertEqual(expected, ast)
Esempio n. 5
0
    def test_pattern_concatenation(self):
        grammar = '''
            start
                =
                {letters_digits}+
                ;


            letters_digits
                =
                ?"[a-z]+"
                + ?'[0-9]+'
                ;
        '''
        pretty = '''
            start
                =
                {letters_digits}+
                ;


            letters_digits
                =
                /[a-z]+/
                + /[0-9]+/
                ;
        '''
        model = compile(grammar=grammar)
        ast = model.parse('abc123 def456')
        self.assertEqual(['abc123', 'def456'], ast)
        print(model.pretty())
        self.assertEqual(trim(pretty), model.pretty())
Esempio n. 6
0
    def test_numbers_and_unicode(self):
        grammar = '''
            rúle(1, -23, 4.56, 7.89e-11, Añez)
                =
                'a'
                ;
        '''
        rule2 = '''

            rulé::Añez
                =
                '\\xf1'
                ;
        '''
        rule3 = '''

            rúlé::Añez
                =
                'ñ'
                ;
        '''
        if PY3:
            grammar += rule3
        else:
            grammar += rule2

        model = compile(grammar, "test")
        self.assertEqual(trim(grammar), ustr(model))
Esempio n. 7
0
 def parse(self, ctx):
     if isinstance(self.literal, str):
         text = self.literal
         if '\n' in text:
             text = trim(text)
         return eval(f'{"f" + repr(text)}', {}, dict(ctx.ast))  # pylint: disable=eval-used
     else:
         return self.literal
 def _to_str(self, lean=False):
     exp = ustr(self.exp._to_str(lean=lean))
     template = '[%s]'
     if isinstance(self.exp, Choice):
         template = trim(self.str_template)
     elif isinstance(self.exp, Group):
         exp = self.exp.exp
     return template % exp
Esempio n. 9
0
 def test_include(self):
     text = '''\
         first
             #include :: "something"
         last\
     '''
     buf = MockIncludeBuffer(trim(text))
     self.assertEqual('first\n\nINCLUDED "something"\nlast', buf.text)
Esempio n. 10
0
 def _to_str(self, lean=False):
     exp = ustr(self.exp._to_str(lean=lean))
     template = '[%s]'
     if isinstance(self.exp, Choice):
         template = trim(self.str_template)
     elif isinstance(self.exp, Group):
         exp = self.exp.exp
     return template % exp
Esempio n. 11
0
    def render_fields(self, fields):
        abstract_template = trim(self.abstract_rule_template)
        abstract_rules = [
            abstract_template.format(name=safe_name(rule.name))
            for rule in self.node.rules
        ]
        abstract_rules = indent('\n'.join(abstract_rules))

        whitespace = self.node.whitespace or self.node.directives.get(
            'whitespace')
        if not whitespace:
            whitespace = 'None'
        elif isinstance(whitespace, RETYPE):
            whitespace = repr(whitespace)
        else:
            whitespace = 're.compile({0})'.format(repr(whitespace))

        if self.node.nameguard is not None:
            nameguard = repr(self.node.nameguard)
        elif self.node.directives.get('nameguard') is not None:
            nameguard = self.node.directives.get('nameguard')
        else:
            nameguard = 'None'

        comments_re = repr(self.node.directives.get('comments'))
        eol_comments_re = repr(self.node.directives.get('eol_comments'))
        ignorecase = self.node.directives.get('ignorecase', 'None')
        left_recursion = self.node.directives.get('left_recursion', True)
        parseinfo = self.node.directives.get('parseinfo', True)

        namechars = repr(self.node.directives.get('namechars') or '')

        rules = '\n'.join(
            [self.get_renderer(rule).render() for rule in self.node.rules])

        version = str(tuple(int(n) for n in str(timestamp()).split('.')))

        keywords = '\n'.join("    %s," % repr(k)
                             for k in sorted(self.keywords))
        if keywords:
            keywords = '\n%s\n' % keywords

        fields.update(
            rules=indent(rules),
            start=self.node.rules[0].name,
            abstract_rules=abstract_rules,
            version=version,
            whitespace=whitespace,
            nameguard=nameguard,
            ignorecase=ignorecase,
            comments_re=comments_re,
            eol_comments_re=eol_comments_re,
            left_recursion=left_recursion,
            parseinfo=parseinfo,
            keywords=keywords,
            namechars=namechars,
        )
Esempio n. 12
0
 def test_36_params_and_keyword_params(self):
     grammar = '''
         rule(A, kwdB=B)
             =
             'a'
             ;
     '''
     model = compile(grammar, "test")
     self.assertEqual(trim(grammar), str(model))
Esempio n. 13
0
 def test_35_only_keyword_params(self):
     grammar = '''
         rule(kwdA=A, kwdB=B)
             =
             'a'
             ;
     '''
     model = compile(grammar, "test")
     self.assertEqual(trim(grammar), ustr(model))
Esempio n. 14
0
 def test_slashed_pattern(self):
     grammar = '''
         start
             =
             ?"[a-z]+/[0-9]+" $
             ;
     '''
     model = compile(grammar=grammar)
     ast = model.parse('abc/123')
     self.assertEqual('abc/123', ast)
     print(model.pretty())
     self.assertEqual(trim(grammar), model.pretty())
Esempio n. 15
0
 def test_raw_string(self):
     grammar = r'''
         start = r'am\nraw' ;
     '''
     pretty = r'''
         start
             =
             'am\\nraw'
             ;
     '''
     model = compile(grammar, "start")
     print(model.pretty())
     self.assertEqual(trim(pretty), model.pretty())
Esempio n. 16
0
 def render_fields(self, fields):
     template = trim(self.option_template)
     options = [
         template.format(option=indent(self.rend(o)))
         for o in self.node.options
     ]
     options = '\n'.join(o for o in options)
     firstset = ' '.join(f[0] for f in sorted(self.node.lookahead()) if f)
     if firstset:
         error = 'expecting one of: ' + firstset
     else:
         error = 'no available options'
     fields.update(n=self.counter(),
                   options=indent(options),
                   error=repr(error))
Esempio n. 17
0
    def render_fields(self, fields):
        self.reset_counter()

        params = kwparams = ''
        if self.node.params:
            params = ', '.join(
                self.param_repr(self.rend(p))
                for p in self.node.params
            )
        if self.node.kwparams:
            kwparams = ', '.join(
                '%s=%s'
                %
                (k, self.param_repr(self.rend(v)))
                for k, v in self.kwparams.items()
            )

        if params and kwparams:
            params = params + ', ' + kwparams
        elif kwparams:
            params = kwparams

        fields.update(params=params)

        defines = compress_seq(self.defines())
        ldefs = set(safe_name(d) for d, l in defines if l)
        sdefs = set(safe_name(d) for d, l in defines if not l and d not in ldefs)

        if not (sdefs or ldefs):
            sdefines = ''
        else:
            sdefs = '[%s]' % ', '.join(urepr(d) for d in sorted(sdefs))
            ldefs = '[%s]' % ', '.join(urepr(d) for d in sorted(ldefs))
            if not ldefs:
                sdefines = '\n\n    self.ast._define(%s, %s)' % (sdefs, ldefs)
            else:
                sdefines = indent(
                    '\n' +
                    trim(self.define_template % (sdefs, ldefs))
                )

        fields.update(defines=sdefines)
        fields.update(
            check_name='\n    self._check_name()' if self.is_name else '',
        )
        leftrec = self.node.is_leftrec
        fields.update(leftrec='\n@leftrec' if leftrec else '')
        fields.update(nomemo='\n@nomemo' if not self.node.is_memoizable and not leftrec else '')
Esempio n. 18
0
    def make_defines_declaration(self):
        defines = compress_seq(self.defines())
        ldefs = oset(safe_name(d) for d, l in defines if l)
        sdefs = oset(
            safe_name(d) for d, l in defines if not l and d not in ldefs)

        if not (sdefs or ldefs):
            return ''
        else:
            sdefs = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs))
            ldefs = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs))
            if not ldefs:
                return '\n\n    self._define(%s, %s)' % (sdefs, ldefs)
            else:
                return indent('\n' + trim(self.define_template %
                                          (sdefs, ldefs)))
Esempio n. 19
0
    def test_numbers_and_unicode(self):
        grammar = '''
            rúle(1, -23, 4.56, 7.89e-11, Añez)
                =
                'a'
                ;


            rúlé::Añez
                =
                'ñ'
                ;
        '''

        model = compile(grammar, "test")
        self.assertEqual(trim(grammar), str(model))
Esempio n. 20
0
    def render(self, template=None, **fields):
        fields.update(__class__=self.__class__.__name__)
        fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')})

        override = self.render_fields(fields)
        if override is not None:
            template = override
        elif template is None:
            template = self.template

        try:
            return self._formatter.format(trim(template), **fields)
        except KeyError:
            # find the missing key
            keys = (p[1] for p in self._formatter.parse(template))
            for key in keys:
                if key and key not in fields:
                    raise KeyError(key, type(self))
            raise
Esempio n. 21
0
 def render_fields(self, fields):
     template = trim(self.option_template)
     options = [
         template.format(option=indent(self.rend(o)))
         for o in self.node.options
     ]
     options = '\n'.join(o for o in options)
     firstset = self.node.lookahead_str()
     if firstset:
         msglines = textwrap.wrap(firstset, width=40)
         error = ['expecting one of: '] + msglines
     else:
         error = ['no available options']
     error = [repr(e) for e in error]
     fields.update(
         n=self.counter(),
         options=indent(options),
         error=error,
     )
    def render(self, template=None, **fields):
        fields.update(__class__=self.__class__.__name__)
        fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')})

        override = self.render_fields(fields)
        if override is not None:
            template = override
        elif template is None:
            template = self.template

        try:
            return self._formatter.format(trim(template), **fields)
        except KeyError:
            # find the missing key
            keys = (p[1] for p in self._formatter.parse(template))
            for key in keys:
                if key and key not in fields:
                    raise KeyError(key, type(self))
            raise
Esempio n. 23
0
    def render(self, **fields):
        template = fields.pop('template', None)
        fields.update(__class__=self.__class__.__name__)
        fields.update({k: v for k, v in vars(self).items() if not k.startswith('_')})

        override = self.render_fields(fields)  # pylint: disable=assignment-from-none
        if override is not None:
            template = override
        elif template is None:
            template = self.template

        try:
            return self._formatter.format(trim(template), **fields)
        except KeyError as e:
            # find the missing key
            keys = (p[1] for p in self._formatter.parse(template))
            for key in keys:
                if key and key not in fields:
                    raise KeyError(key, type(self)) from e
            raise
Esempio n. 24
0
    def test_based_rule(self):
        grammar = '''\
            start
                =
                b $
                ;


            a
                =
                @:'a'
                ;


            b < a
                =
                {@:'b'}
                ;
            '''
        model = compile(grammar, "test")
        ast = model.parse("abb", nameguard=False)
        self.assertEqual(['a', 'b', 'b'], ast)
        self.assertEqual(trim(grammar), ustr(model))
Esempio n. 25
0
    def _to_str(self, lean=False):
        comments = self.comments_str()
        if lean:
            params = ''
        else:
            params = ', '.join(
                self.param_repr(p) for p in self.params
            ) if self.params else ''

            kwparams = ''
            if self.kwparams:
                kwparams = ', '.join(
                    '%s=%s' % (k, self.param_repr(v)) for (k, v)
                    in self.kwparams.items()
                )

            if params and kwparams:
                params = '(%s, %s)' % (params, kwparams)
            elif kwparams:
                params = '(%s)' % (kwparams)
            elif params:
                if len(self.params) == 1:
                    params = '::%s' % params
                else:
                    params = '(%s)' % params

        base = ' < %s' % ustr(self.base.name) if self.base else ''

        return trim(self.str_template).format(
            name=self.name,
            base=base,
            params=params,
            exp=indent(self.exp._to_str(lean=lean)),
            comments=comments,
            is_name='@name\n' if self.is_name else '',
        )
Esempio n. 26
0
    def test_36_param_combinations(self):
        def assert_equal(target, value):
            self.assertEqual(target, value)

        class TC36Semantics:

            """Check all rule parameters for expected types and values"""

            def rule_positional(self, ast, p1, p2, p3, p4):
                assert_equal("ABC", p1)
                assert_equal(123, p2)
                assert_equal('=', p3)
                assert_equal("+", p4)
                return ast

            def rule_keyword(self, ast, k1, k2, k3, k4):
                assert_equal("ABC", k1)
                assert_equal(123, k2)
                assert_equal('=', k3)
                assert_equal('+', k4)
                return ast

            def rule_all(self, ast, p1, p2, p3, p4, k1, k2, k3, k4):
                assert_equal("DEF", p1)
                assert_equal(456, p2)
                assert_equal('=', p3)
                assert_equal("+", p4)
                assert_equal("HIJ", k1)
                assert_equal(789, k2)
                assert_equal('=', k3)
                assert_equal('+', k4)
                return ast

        grammar = '''
            @@ignorecase::False
            @@nameguard

            start
                = {rule_positional | rule_keywords | rule_all} $ ;
            rule_positional('ABC', 123, '=', '+')
                = 'a' ;
            rule_keywords(k1=ABC, k3='=', k4='+', k2=123)
                = 'b' ;
            rule_all('DEF', 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789)
                = 'c' ;
        '''

        pretty = '''
            @@ignorecase :: False
            @@nameguard :: True

            start
                =
                {rule_positional | rule_keywords | rule_all} $
                ;


            rule_positional(ABC, 123, '=', '+')
                =
                'a'
                ;


            rule_keywords(k1=ABC, k3='=', k4='+', k2=123)
                =
                'b'
                ;


            rule_all(DEF, 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789)
                =
                'c'
                ;
        '''

        model = compile(grammar, 'RuleArguments')
        self.assertEqual(trim(pretty), str(model))
        model = compile(pretty, 'RuleArguments')

        ast = model.parse("a b c")
        self.assertEqual(['a', 'b', 'c'], ast)
        semantics = TC36Semantics()
        ast = model.parse("a b c", semantics=semantics)
        self.assertEqual(['a', 'b', 'c'], ast)
        codegen(model)
 def trim(self, item, tabwidth=4):
     return trim(self.rend(item), tabwidth=tabwidth)
Esempio n. 28
0
 def trim(self, item, tabwidth=4):
     return trim(self.rend(item), tabwidth=tabwidth)
Esempio n. 29
0
 def _to_str(self, lean=False):
     exp = self.exp._to_ustr(lean=lean)
     if len(exp.splitlines()) > 1:
         return '(\n%s\n)' % indent(exp)
     else:
         return '(%s)' % trim(exp)
Esempio n. 30
0
 def _to_str(self, lean=False):
     exp = self.exp._to_ustr(lean=lean)
     if len(exp.splitlines()) > 1:
         return '(\n%s\n)' % indent(exp)
     else:
         return '(%s)' % trim(exp)
Esempio n. 31
0
    def test_pretty(self):
        grammar = r'''
            start = lisp ;
            lisp = sexp | list | symbol;
            sexp::SExp = '(' cons:lisp '.' ~ cdr:lisp ')' ;
            list::List = '(' elements:{sexp}* ')' ;
            symbol::Symbol = value:/[^\s().]+/ ;
        '''

        pretty = trim(r'''
            start
                =
                lisp
                ;


            lisp
                =
                sexp | list | symbol
                ;


            sexp::SExp
                =
                '(' cons:lisp '.' ~ cdr:lisp ')'
                ;


            list::List
                =
                '(' elements:{sexp} ')'
                ;


            symbol::Symbol
                =
                value:/[^\s().]+/
                ;
        ''')

        pretty_lean = trim(r'''
            start
                =
                lisp
                ;


            lisp
                =
                sexp | list | symbol
                ;


            sexp
                =
                '(' lisp '.' ~ lisp ')'
                ;


            list
                =
                '(' {sexp} ')'
                ;


            symbol
                =
                /[^\s().]+/
                ;
        ''')

        model = compile(grammar=grammar)

        self.assertEqual(pretty, model.pretty())
        self.assertEqual(str(model), model.pretty())

        self.assertEqual(pretty_lean, model.pretty_lean())