예제 #1
0
class SyntaxAnalyzer(object):

    def __init__(self, code_file):
        """
        code_file: code to compile
        grammar_file: json-encoded grammar
        """
        self.tokenizer = Tokenizer(code_file)
        self.tokenizer.make_tokens()
        self.make_token_sequence()

        self.table = None

        self.errors = []

    def set_grammar(self, grammar_file, initial=None):
        self.grammar_full = Grammar(grammar_file, initial)
        self.grammar = self.grammar_full.productions

    def make_token_sequence(self):
        self.token_sequence = []
        position = 0
        total = len(self.tokenizer.tokens) + len(self.tokenizer.symbols)

        while position < total:
            if self.tokenizer.tokens and \
                    self.tokenizer.tokens[0].pos == position:
                self.token_sequence.append(self.tokenizer.tokens[0])
                self.tokenizer.tokens.pop(0)
            else:
                self.token_sequence.append(self.tokenizer.symbols[0])
                self.tokenizer.symbols.pop(0)
            position += 1

        self.token_sequence.append(
            Token(
                line=0,
                lexeme=constants.END_SYMBOL,
                token=constants.END_SYMBOL,
                pos=-1
                )
            )

    def make_table(self):
        terminals = {key:None for key in constants.TERMINALS}
        self.table = {key:terminals.copy() for key in self.grammar.keys()}

    def load_table(self, table_file):
        if not self.table:
            self.make_table()

        loaded_table = CSVTableReader.dict_from_table(table_file)

        for production in loaded_table.keys():
            for terminal in loaded_table[production].keys():
                production_choice = loaded_table[production][terminal]
                if production_choice != None:
                    self.table[production][terminal] = \
                        self.grammar[production][production_choice]

    def perform_analysis(self):
        stack = [constants.END_SYMBOL, self.grammar_full.initial]
        if not self.token_sequence:
            self.make_token_sequence()

        sequence = self.token_sequence
        first_terminal = None
        production = None
        curr_prod = None

        while stack and sequence:
            print 'STACK:', stack
            print 'SEQUENCE', [a.get_terminal() for a in sequence]
            raw_input()

            first_terminal = sequence[0].get_terminal()

            #Ask if a new production needs to be pushed into the pile
            if stack[-1] in self.grammar_full.non_terminals:
                #Ask if such a production exists on the table
                if self.table[stack[-1]][first_terminal]:
                    production = list(self.table[stack.pop()][first_terminal])
                    print production
                    curr_prod = list(production)
                    curr_prod.reverse()
                    #Ask if it isn't an empty production
                    if not production == constants.EMPTY_PRODUCTION:
                        while production:
                            stack.append(production.pop()) #reversing production

                else:
                    self.add_error(stack[-1], first_terminal, sequence[0].line)
                    sequence.pop(0)
                    #error handling here

            elif stack[-1] == first_terminal:
                stack.pop()
                if curr_prod:
                    curr_prod.pop()
                sequence.pop(0) #sequence is NOT a stack, pops from the front

            else:
                self.add_error(stack[-1], first_terminal, sequence[0].line)
                while curr_prod:
                    curr_prod.pop()
                    stack.pop()

                if len(stack) == 1 and len(sequence) > 1:
                    stack.append(constants.WILDCARD_PROD)
                #error handling here


    def add_error(self, expected, actual, line):
        if expected is '$':
            expected = constants.END_OF_FILE
        self.errors.append(
            SyntaxError(
                line=line,
                error=constants.SYNTAX_ERROR % {
                    'expected': expected,
                    'actual': actual
                    }
                )
            )

    def print_all(self):
        self.tokenizer.print_all()