コード例 #1
0
    def compile_rules(self, context):
        """
        Pass to turn the lexer DSL into our internal regexp objects.
        """
        assert context.nfa_start is None

        regexps = RegexpCollection()

        # Import patterns into regexps
        for name, pattern, loc in self.patterns:
            with Context('In definition of lexer pattern {}'.format(name),
                         loc):
                regexps.add_pattern(name, pattern)

        # Now turn each rule into a NFA
        nfas = []

        for i, a in enumerate(self.rules):
            assert isinstance(a, RuleAssoc)

            # Check that actions never emit Termination and LexingFailure
            # tokens. These tokens are supposed to be emitted by the lexing
            # engine only.
            def check(token):
                check_source_language(
                    token not in (self.tokens.Termination,
                                  self.tokens.LexingFailure),
                    '{} is reserved for automatic actions only'.format(
                        token.dsl_name))

            if isinstance(a.action, Case.CaseAction):
                for alt in a.action.all_alts:
                    check(alt.send)
            elif isinstance(a.action, Ignore):
                pass
            else:
                assert isinstance(a.action, TokenAction)
                check(a.action)

            with Context('In definition of lexer rules', a.location):
                nfa_start, nfa_end = regexps.nfa_for(a.matcher.regexp)
            nfas.append(nfa_start)

            # The first rule that was added must have precedence when multiple
            # rules compete for the longest match. To implement this behavior,
            # we associate increasing ids to each token action.
            nfa_end.label = (i, a.action)

        # Create a big OR for all possible accepted patterns
        context.nfa_start = NFAState()
        for nfa in nfas:
            context.nfa_start.add_transition(None, nfa)
コード例 #2
0
ファイル: test.py プロジェクト: yakobowski/langkit
        # Ranges
        r'[]',
        r'[a]',
        r'[a-c]',
        r'[^a-c]',
        r'[^]',
        r'[a^]',
        r'[a-]',
        r'[-b]',
        r'[a-c-]',

        # Escape sequences in ranges
        r'[\]]',
        r'[\u1234]',
        r'[\u1234-\u1243]',
]:

    print('== {} =='.format(regexp))

    lexer = RegexpCollection()
    try:
        parser = lexer._parse(regexp)
    except DiagnosticError:
        pass
    else:
        print(parser)
    print('')

print('Done')