Esempio n. 1
0
File: ll1.py Progetto: xtracool/yacv
 def parse(self, string):
     log = logging.getLogger('yacv')
     if not self.is_ll1:
         print(self.grammar.nonterminals)
         raise YACVError('Grammar is not LL(1). The parsing cannot proceed')
     # string: list of terminals
     if string[-1] != '$':
         string.append('$')
     stack = [AbstractSyntaxTree('S\'')]
     popped_stack = []
     while stack[-1].root != '$':
         # Don't assign, destroys the tree ref
         a = string[0]
         if stack[-1].root == a:
             popped_stack.append(stack.pop(-1))
             a = string.pop(0)
         elif stack[-1].root in self.grammar.terminals:
             raise ValueError(
                 'Error because top = {}, terminal'.format(top))
         elif self.parsing_table.at[stack[-1].root, a] == YACV_ERROR:
             raise ValueError('Error because parsing table errored out')
         elif self.parsing_table.at[stack[-1].root, a] != YACV_ACCEPT:
             prod = self.parsing_table.at[stack[-1].root, a][0]
             stack[-1].prod_id = self.grammar.prods.index(prod)
             log.debug('Expanding production : {}'.format(prod))
             desc_list = []
             for symbol in prod.rhs:
                 x = AbstractSyntaxTree(symbol)
                 stack[-1].desc.append(x)
                 desc_list.append(x)
             popped_stack.append(stack.pop(-1))
             if prod.rhs[0] != YACV_EPSILON:
                 for i in range(len(desc_list) - 1, -1, -1):
                     stack.append(desc_list[i])
             log.debug(list(reversed(stack)))
             log.debug('End of iteration' + 16 * '-')
     if len(string) > 0 and string[0] != '$':
         raise YACVError('Cannot parse the remainder of string {}'.format(
             ''.join(string)))
     return popped_stack[0]
Esempio n. 2
0
 def __init__(self, fname='simple-grammar.txt'):
     lines = [x.strip() for x in open(fname).readlines()]
     self.prods = []  # list containing all the productions
     all_symbols = set()
     for line in lines:
         if line == '':
             continue
         try:
             lhs, rhs = line.split('->')
         except ValueError as e:
             raise YACVError('Invalid grammar file')
         lhs = lhs.strip()
         rhs = [x for x in rhs.split(' ') if x]
         # TODO: find a better way to do this
         for i, _ in enumerate(rhs):
             if rhs[i] == "\'\'":
                 rhs[i] = YACV_EPSILON
         self.prods.append(Production(lhs, rhs))
         all_symbols = all_symbols.union(rhs)
     # Augment the grammar
     self.prods.insert(0, Production('S\'', [self.prods[0].lhs, '$']))
     # Accumulate nonterminal information
     self.nonterminals = OrderedDict()
     for i, prod in enumerate(self.prods):
         lhs, rhs = prod.lhs, prod.rhs
         if lhs not in self.nonterminals.keys():
             self.nonterminals[lhs] = {
                 # number of productions this nonterminal is on the LHS of
                 'prods_lhs': [i],
                 # where does this non terminal appear on RHS ?
                 # what prod and what place ?
                 'prods_rhs': [],
                 'first': set(),
                 'follow': set(),
                 'nullable': False
             }
         else:
             self.nonterminals[lhs]['prods_lhs'].append(i)
     self.terminals = all_symbols.difference(set(self.nonterminals.keys()))
     if YACV_EPSILON in self.terminals:
         self.terminals = self.terminals.difference(set([YACV_EPSILON]))
     self.terminals.add('$')
     self.terminals = sorted(self.terminals)
     # Update nonterminals_on_rhs for every prod using above data
     for prodno, prod in enumerate(self.prods):
         lhs, rhs = prod.lhs, prod.rhs
         for i, symbol in enumerate(rhs):
             if symbol in self.nonterminals.keys():
                 self.nonterminals[symbol]['prods_rhs'].append((prodno, i))
     self.build_first()
     self.build_follow()
Esempio n. 3
0
File: ll1.py Progetto: xtracool/yacv
    def __init__(self, fname='ll1-expression-grammar.txt'):
        self.grammar = Grammar(fname)
        # Check for left recursion
        for prod in self.grammar.prods:
            if prod.lhs == prod.rhs[0]:
                raise YACVError(
                    'The grammar is not LL(1) due to left recursion in production {}'
                    .format(prod))

        self.parsing_table = pd.DataFrame(
            columns=self.grammar.terminals,
            index=self.grammar.nonterminals.keys())
        self.parsing_table.loc[:, :] = YACV_ERROR
        self.is_ll1 = True
        # pprint(self.parsing_table)
        self.build_parsing_table()
Esempio n. 4
0
 def build_parsing_table(self):
     log = logging.getLogger('yacv')
     if self.parsing_table_built:
         log.warn('Parsing table is already built!')
         return 
     if not self.automaton_built:
         raise YACVError('LR state automaton must be built before building parsing table')
     terminals = self.grammar.terminals
     for state_id, transitions in self.automaton_transitions.items():
         state = self.automaton_states[state_id]
         if len(state.reduce_items) > 0:
             # This is kinda dumb, why am I not using reduce_items directly ?
             # TODO: Fix this
             for item in state.items:
                 if item.reduce:
                     prod = item.production
                     prod_id = self.grammar.prods.index(prod)
                     if prod_id == 0:
                         col = (YACV_ACTION, '$')
                         self.parsing_table.at[state_id, col] = YACV_ACCEPT
                         continue
                     lookaheads = item.lookaheads
                     entry = 'r' + str(prod_id)
                     for symbol in item.lookaheads:
                         col = (YACV_ACTION, symbol)
                         if self.parsing_table.at[state_id, col] == YACV_ERROR:
                             self.parsing_table.at[state_id, col] = []
                         self.parsing_table.at[state_id, col].append(entry)
                         if len(self.parsing_table.at[state_id, col]) > 1:
                             self.is_valid = False
         for symbol, new_state_id in transitions.items():
             if symbol in terminals:
                 entry = 's' + str(new_state_id)
                 col = (YACV_ACTION, symbol)
             else:
                 entry = str(new_state_id)
                 col = (YACV_GOTO, symbol)
             if self.parsing_table.at[state_id, col] == YACV_ERROR:
                 self.parsing_table.at[state_id, col] = []
             self.parsing_table.at[state_id, col].append(entry)
             if len(self.parsing_table.at[state_id, col]) > 1:
                 self.is_valid = False
     self.parsing_table_built = True
     if not self.is_valid:
         log.warning('Grammar is not valid')
     else:
         log.info('Parsing table built successfully')
Esempio n. 5
0
    def build_parsing_table(self):
        log = logging.getLogger('yacv')
        if self.parsing_table_built:
            log.warn('Parsing table is already built!')
            return
        if not self.automaton_built:
            raise YACVError('LR state automaton must be built before building parsing table')
        terminals = self.grammar.terminals
        for state_id, transitions in self.automaton_transitions.items():
            state = self.automaton_states[state_id]
            if state.accept:
                col = (YACV_ACTION, '$')
                self.parsing_table.at[state_id, col] = YACV_ACCEPT
            elif len(state.reduce_items) > 0:
                for item in state.items:
                    if item.reduce:
                        lhs = item.production.lhs
                        follow = self.grammar.nonterminals[lhs]['follow']
                        prod_id = self.grammar.prods.index(item.production)
                        entry = YACV_REDUCE + str(prod_id)
                        for symbol in follow:
                            col = (YACV_ACTION, symbol)
                            if self.parsing_table.at[state_id, col] == YACV_ERROR:
                                self.parsing_table.at[state_id, col] = []
                            self.parsing_table.at[state_id, col].append(entry)
                            if len(self.parsing_table.at[state_id, col]) > 1:
                                self.is_valid = False
            for symbol, new_state_id in transitions.items():
                if symbol in terminals:
                    entry = YACV_SHIFT + str(new_state_id)
                    col = (YACV_ACTION, symbol)
                else:
                    entry = str(new_state_id)
                    col = (YACV_GOTO, symbol)
                if self.parsing_table.at[state_id, col] == YACV_ERROR:
                    self.parsing_table.at[state_id, col] = []
                self.parsing_table.at[state_id, col].append(entry)
                if len(self.parsing_table.at[state_id, col]) > 1:
                    self.is_valid = False

        self.parsing_table_built = True
        if not self.is_valid:
            log.warning('Grammar is not SLR(1)')
        else:
            log.info('Parsing table built successfully')
Esempio n. 6
0
 def parse(self, string):
     log = logging.getLogger('yacv')
     if not self.is_valid:
         raise YACVError('Given grammar is not valid for chosen parsing algorithm. Parsing will not continue')
     # page 7 at below link is really helpful
     # https://www2.cs.duke.edu/courses/spring02/cps140/lects/sectlrparseS.pdf
     assert self.parsing_table_built
     assert len(string) > 0
     terminals = self.grammar.terminals
     if string[-1] != '$':
         string.append('$')
     stack = [0]
     while True:
         top = stack[-1]
         a = string[0]
         entry = self.parsing_table.at[top, (YACV_ACTION, a)]
         if entry == YACV_ERROR:
             log.error('Parse error')
             raise YACVError('YACV_ERROR entry for top = {}, a = {}'.format(top, a))
         if isinstance(entry, list):
             entry = entry[0]
         log.debug('stack top = {}, a = {}, entry = {}'.format(top, a, entry))
         if entry[0] == 's':
             stack.append(AbstractSyntaxTree(a))
             stack.append(int(entry[1:]))
             string.pop(0)
         elif entry[0] == 'r':
             prod_id =int(entry[1:])
             prod = self.grammar.prods[prod_id]
             new_tree = AbstractSyntaxTree(prod.lhs)
             new_tree.prod_id = prod_id
             popped_list = []
             if prod.rhs[0] != YACV_EPSILON:
                 for _ in range(len(prod.rhs)):
                     if not stack:
                         raise YACVError('Stack prematurely empty')
                     stack.pop(-1) # pops the state number
                     if not stack:
                         raise YACVError('Stack prematurely empty')
                     popped_list.append(stack.pop(-1)) # pops the symbol
             else:
                 new_tree.desc.append(AbstractSyntaxTree(YACV_EPSILON))
             for i in range(len(popped_list)-1, -1, -1):
                 new_tree.desc.append(popped_list[i])
             new_top = stack[-1]
             nonterminal = prod.lhs
             new_state = self.parsing_table.at[new_top, (YACV_GOTO, nonterminal)]
             stack.append(new_tree)
             if isinstance(new_state, list):
                 new_state = new_state[0]
             stack.append(int(new_state))
         elif entry == YACV_ACCEPT:
             prod = self.grammar.prods[0]
             assert prod.rhs[-1] == '$' and len(prod.rhs) == 2
             if not stack:
                 raise ValueError() # TODO: Convert this to YACVError stating an error has occurred due to stack becoming empty prematurely
             stack.pop(-1)
             if not stack:
                 raise ValueError() # TODO: Convert this to YACVError stating an error has occurred due to stack becoming empty prematurely
             tree = stack.pop(-1)
             log.info('Parse successful')
             log.debug('Final tree = {}'.format(tree))
             return tree
             break
         else:
             raise YACVError('Unknown error while parsing')
             break