Example #1
0
    def match(self, lexer: Lexer, debug: int = 0, partial=True):
        """
        try to match lexer's current token with a production list in
        productions

        return AST instance: match SUCCESS with a production list in
        productions
        return None: match FAILED with whole productions
        """
        tree = Ast(self.name, lexer.current_position(), grammars=[])
        recursive_productions = []
        for production_list in self.productions:
            lexer.anchor()
            if debug:
                print(' ' * (
                    debug - 1) + f'### {self.name}.match() with production_list: {production_list}')
            # productions: [[G1, G2], [G3, G4], ...] <-> G1 G2 | G3 G4 | ...
            #
            # try to match all tokes with a production_list, mark the tracker
            # case 1: matched & break loop
            # case 2: unmatched, try next production_list in productions until
            # loop ends and function returns `None`
            success = self.build_ast(tree, lexer, production_list,
                                     recursive_productions,
                                     debug) if debug else self.build_ast(tree,
                                                                         lexer,
                                                                         production_list,
                                                                         recursive_productions)
            if success is True or success is None:
                # success case or Epsilon case
                if debug:
                    print(
                        ' ' * (debug - 1) + f'+++ {self.name}.match() SUCCESS')
                break
            else:
                # failed case
                continue
        else:
            if debug:
                print(' ' * (debug - 1) + f'--- {self.name}.match() FAILED')
            return None

        # one production_list is fully matched, pop anchor stack by one
        lexer.release_anchor()

        if lexer.current_token is None or tree.children or partial:
            return tree
        return None
Example #2
0
    def match(self, lexer: Lexer, debug: int = 0, partial=True):

        if debug:
            print(' ' * (debug - 1) +
                  f'### Group {self.name}.match(), calling super\'s match()')

        tree = Ast(self.name, lexer.current_position(), grammars=[])
        if lexer.current_token is None\
            or lexer.current_token.spelling == '_EOF':
            if self.repeat[0] == 0:
                return tree
            return None

        lexer.anchor()
        repetition = 0
        if self.repeat[1] == -1:
            # can repeat for infinite times: grammar* | grammar+ | grammar{a,}
            while True:
                nodes = super().match(lexer, debug)
                if nodes is None:
                    break
                tree.extend(nodes)
                repetition += 1
                if lexer.current_token is None:
                    break
        else:
            # repeat for limited times: grammar{a, b} | grammar{a} | [grammar]
            while True:
                if repetition >= self.repeat[1]:
                    break
                nodes = super().match(lexer, debug)
                if nodes is None:
                    break
                tree.extend(nodes)
                repetition += 1
                if lexer.current_token is None:
                    break

        if repetition < self.repeat[0]:
            # if actual repetition is smaller than minimum times

            if debug:
                print(
                    ' ' * (debug - 1) +
                    f'--- Group {self.name}.match() FAILED in minimal repetition)'
                )

            lexer.backward()

            if debug:
                print(
                    f'<<< lexer backwarded, current token: {lexer.current_token}'
                )

            return None

        if debug:
            print(' ' * (debug - 1) + f'+++ Group {self.name}.match() SUCCESS')
        lexer.release_anchor()
        return tree
Example #3
0
    def match(self, lexer: Lexer, debug: int = 0):
        # the most fundamental match() function:
        # match current_token's spelling with Literal's regex text
        if lexer.current_token is None\
            or lexer.current_token.spelling == '_EOF':
            return None

        # reserve epsilon expression match to real 'EPSILON_EXPR' instance
        if self.name != 'EPSILON_EXPR'\
                and lexer.current_token.spelling == '_e':
            return None

        # if a TEXT grammar meets a text-literal token, return an ast node
        # directly
        if self.name == 'TEXT'\
                and lexer.current_token.type == TokenType.TEXTLITERAL:
            node = Ast(self.name, lexer.current_token.position,
                       grammar=lexer.current_token.spelling)
            if debug:
                print(' ' * (
                    debug - 1) + f'+++ {self.name}.match() {repr(self.regex)} with TEXT {lexer.current_token.spelling} finished')
            lexer.forward()
            if debug:
                print(
                    f'>>> lexer forwarded, current token: {lexer.current_token}')
            return node

        # if a NAME grammar meets a token with reserved word, return None
        # directly a reserved word can be only matched by a STRING grammar
        if self.name == 'NAME'\
                and lexer.current_token.spelling in ReservedNames.names:
            return None

        # a text-literal token cannot match any other grammars
        if lexer.current_token.type != TokenType.TEXTLITERAL:
            regex_obj = re.compile(self.regex)
            match_result = regex_obj.match(lexer.current_token.spelling)

            if self.name == 'STRING' and not (
                        match_result and match_result.span()[1] == len(
                        lexer.current_token.spelling)):
                # if atom `STRING` match failed, try a special case:
                match_result = re.match(
                    '\\\'\(\\\'\[\\\\w\\\\W\]\*\?\\\'\|\\\"\[\\\\w\\\\W\]\*\?\\\"\)\\\'',
                    lexer.current_token.spelling)

            if match_result and match_result.span()[1] == len(
                    lexer.current_token.spelling):
                # matched, build AST node for this token's spelling, move
                # lexer to next token
                node = Ast(self.name, lexer.current_token.position,
                           grammar=lexer.current_token.spelling)
                if debug:
                    print(' ' * (
                        debug - 1) + f'+++ {self.name}.match() {repr(self.regex)} with token <{lexer.current_token}> SUCCESS')
                lexer.forward()
                if debug:
                    print(
                        f'>>> lexer forwarded, current token: {lexer.current_token}')
                return node

            if debug:
                print(' ' * (
                    debug - 1) + f'--- {self.name}.match() {repr(self.regex)} with token <{lexer.current_token}> FAILED')

        return None
Example #4
0
    def build_ast(self, tree: Ast, lexer: Lexer, production_list: list,
                  recursive_productions: list = None, debug: int = 0):
        """
        build ast tree on the given instance (parameter `tree`)

        return True: SUCCESS, tree is appended with nodes
        return False: FAILED, and tree is untouched
        return None: Epsilon
        """
        if debug:
            print(' ' * (debug - 1) + f'### {self.name}.build_ast()')

        for grammar in production_list:
            nodes = grammar.match(lexer,
                                  debug + 4) if debug else grammar.match(lexer)

            # this grammar not matched:
            # 1. abandon whole production_list and skipped loop
            # 2. lexer setup rollback flag
            if nodes is None:

                if debug:
                    print(' ' * (
                        debug - 1) + f'--- {self.name}.build_ast() with grammar <{grammar}> FAILED')

                tree.empty()
                lexer.backward()

                if debug:
                    print(
                        f'<<< lexer backwarded, current token: {lexer.current_token}')

                return False

            if nodes == '_E':
                # Epsilon
                if debug:
                    print(' ' * (debug - 1) + f'+++ Epsilon match')
                    tree.append(Ast('Epsilon', lexer.current_token.position,
                                    grammar='_e'))
                return None

            if isinstance(grammar, Group):
                # grammar is a Group
                if not self.ignore_set:
                    tree.extend(nodes)
                else:
                    for node in nodes:
                        if node.name not in self.ignore_set:
                            tree.append(node)
            else:
                # grammar is a Token, only one production hence nodes is
                # actually a `node`
                if not self.ignore_set or nodes.name not in self.ignore_set:
                    tree.append(nodes)

        # all grammar in current production_list matched, operation SUCCESS
        if debug:
            print(' ' * (debug - 1) + f'+++ {self.name}.build_ast() SUCCESS')

        return True