def parse(self, string): log = logging.getLogger('yacv') if not self.is_ll1: print(self.grammar.nonterminals) raise YACVError('Grammar is not LL(1). The parsing cannot proceed') # string: list of terminals if string[-1] != '$': string.append('$') stack = [AbstractSyntaxTree('S\'')] popped_stack = [] while stack[-1].root != '$': # Don't assign, destroys the tree ref a = string[0] if stack[-1].root == a: popped_stack.append(stack.pop(-1)) a = string.pop(0) elif stack[-1].root in self.grammar.terminals: raise ValueError( 'Error because top = {}, terminal'.format(top)) elif self.parsing_table.at[stack[-1].root, a] == YACV_ERROR: raise ValueError('Error because parsing table errored out') elif self.parsing_table.at[stack[-1].root, a] != YACV_ACCEPT: prod = self.parsing_table.at[stack[-1].root, a][0] stack[-1].prod_id = self.grammar.prods.index(prod) log.debug('Expanding production : {}'.format(prod)) desc_list = [] for symbol in prod.rhs: x = AbstractSyntaxTree(symbol) stack[-1].desc.append(x) desc_list.append(x) popped_stack.append(stack.pop(-1)) if prod.rhs[0] != YACV_EPSILON: for i in range(len(desc_list) - 1, -1, -1): stack.append(desc_list[i]) log.debug(list(reversed(stack))) log.debug('End of iteration' + 16 * '-') if len(string) > 0 and string[0] != '$': raise YACVError('Cannot parse the remainder of string {}'.format( ''.join(string))) return popped_stack[0]
def __init__(self, fname='simple-grammar.txt'): lines = [x.strip() for x in open(fname).readlines()] self.prods = [] # list containing all the productions all_symbols = set() for line in lines: if line == '': continue try: lhs, rhs = line.split('->') except ValueError as e: raise YACVError('Invalid grammar file') lhs = lhs.strip() rhs = [x for x in rhs.split(' ') if x] # TODO: find a better way to do this for i, _ in enumerate(rhs): if rhs[i] == "\'\'": rhs[i] = YACV_EPSILON self.prods.append(Production(lhs, rhs)) all_symbols = all_symbols.union(rhs) # Augment the grammar self.prods.insert(0, Production('S\'', [self.prods[0].lhs, '$'])) # Accumulate nonterminal information self.nonterminals = OrderedDict() for i, prod in enumerate(self.prods): lhs, rhs = prod.lhs, prod.rhs if lhs not in self.nonterminals.keys(): self.nonterminals[lhs] = { # number of productions this nonterminal is on the LHS of 'prods_lhs': [i], # where does this non terminal appear on RHS ? # what prod and what place ? 'prods_rhs': [], 'first': set(), 'follow': set(), 'nullable': False } else: self.nonterminals[lhs]['prods_lhs'].append(i) self.terminals = all_symbols.difference(set(self.nonterminals.keys())) if YACV_EPSILON in self.terminals: self.terminals = self.terminals.difference(set([YACV_EPSILON])) self.terminals.add('$') self.terminals = sorted(self.terminals) # Update nonterminals_on_rhs for every prod using above data for prodno, prod in enumerate(self.prods): lhs, rhs = prod.lhs, prod.rhs for i, symbol in enumerate(rhs): if symbol in self.nonterminals.keys(): self.nonterminals[symbol]['prods_rhs'].append((prodno, i)) self.build_first() self.build_follow()
def __init__(self, fname='ll1-expression-grammar.txt'): self.grammar = Grammar(fname) # Check for left recursion for prod in self.grammar.prods: if prod.lhs == prod.rhs[0]: raise YACVError( 'The grammar is not LL(1) due to left recursion in production {}' .format(prod)) self.parsing_table = pd.DataFrame( columns=self.grammar.terminals, index=self.grammar.nonterminals.keys()) self.parsing_table.loc[:, :] = YACV_ERROR self.is_ll1 = True # pprint(self.parsing_table) self.build_parsing_table()
def build_parsing_table(self): log = logging.getLogger('yacv') if self.parsing_table_built: log.warn('Parsing table is already built!') return if not self.automaton_built: raise YACVError('LR state automaton must be built before building parsing table') terminals = self.grammar.terminals for state_id, transitions in self.automaton_transitions.items(): state = self.automaton_states[state_id] if len(state.reduce_items) > 0: # This is kinda dumb, why am I not using reduce_items directly ? # TODO: Fix this for item in state.items: if item.reduce: prod = item.production prod_id = self.grammar.prods.index(prod) if prod_id == 0: col = (YACV_ACTION, '$') self.parsing_table.at[state_id, col] = YACV_ACCEPT continue lookaheads = item.lookaheads entry = 'r' + str(prod_id) for symbol in item.lookaheads: col = (YACV_ACTION, symbol) if self.parsing_table.at[state_id, col] == YACV_ERROR: self.parsing_table.at[state_id, col] = [] self.parsing_table.at[state_id, col].append(entry) if len(self.parsing_table.at[state_id, col]) > 1: self.is_valid = False for symbol, new_state_id in transitions.items(): if symbol in terminals: entry = 's' + str(new_state_id) col = (YACV_ACTION, symbol) else: entry = str(new_state_id) col = (YACV_GOTO, symbol) if self.parsing_table.at[state_id, col] == YACV_ERROR: self.parsing_table.at[state_id, col] = [] self.parsing_table.at[state_id, col].append(entry) if len(self.parsing_table.at[state_id, col]) > 1: self.is_valid = False self.parsing_table_built = True if not self.is_valid: log.warning('Grammar is not valid') else: log.info('Parsing table built successfully')
def build_parsing_table(self): log = logging.getLogger('yacv') if self.parsing_table_built: log.warn('Parsing table is already built!') return if not self.automaton_built: raise YACVError('LR state automaton must be built before building parsing table') terminals = self.grammar.terminals for state_id, transitions in self.automaton_transitions.items(): state = self.automaton_states[state_id] if state.accept: col = (YACV_ACTION, '$') self.parsing_table.at[state_id, col] = YACV_ACCEPT elif len(state.reduce_items) > 0: for item in state.items: if item.reduce: lhs = item.production.lhs follow = self.grammar.nonterminals[lhs]['follow'] prod_id = self.grammar.prods.index(item.production) entry = YACV_REDUCE + str(prod_id) for symbol in follow: col = (YACV_ACTION, symbol) if self.parsing_table.at[state_id, col] == YACV_ERROR: self.parsing_table.at[state_id, col] = [] self.parsing_table.at[state_id, col].append(entry) if len(self.parsing_table.at[state_id, col]) > 1: self.is_valid = False for symbol, new_state_id in transitions.items(): if symbol in terminals: entry = YACV_SHIFT + str(new_state_id) col = (YACV_ACTION, symbol) else: entry = str(new_state_id) col = (YACV_GOTO, symbol) if self.parsing_table.at[state_id, col] == YACV_ERROR: self.parsing_table.at[state_id, col] = [] self.parsing_table.at[state_id, col].append(entry) if len(self.parsing_table.at[state_id, col]) > 1: self.is_valid = False self.parsing_table_built = True if not self.is_valid: log.warning('Grammar is not SLR(1)') else: log.info('Parsing table built successfully')
def parse(self, string): log = logging.getLogger('yacv') if not self.is_valid: raise YACVError('Given grammar is not valid for chosen parsing algorithm. Parsing will not continue') # page 7 at below link is really helpful # https://www2.cs.duke.edu/courses/spring02/cps140/lects/sectlrparseS.pdf assert self.parsing_table_built assert len(string) > 0 terminals = self.grammar.terminals if string[-1] != '$': string.append('$') stack = [0] while True: top = stack[-1] a = string[0] entry = self.parsing_table.at[top, (YACV_ACTION, a)] if entry == YACV_ERROR: log.error('Parse error') raise YACVError('YACV_ERROR entry for top = {}, a = {}'.format(top, a)) if isinstance(entry, list): entry = entry[0] log.debug('stack top = {}, a = {}, entry = {}'.format(top, a, entry)) if entry[0] == 's': stack.append(AbstractSyntaxTree(a)) stack.append(int(entry[1:])) string.pop(0) elif entry[0] == 'r': prod_id =int(entry[1:]) prod = self.grammar.prods[prod_id] new_tree = AbstractSyntaxTree(prod.lhs) new_tree.prod_id = prod_id popped_list = [] if prod.rhs[0] != YACV_EPSILON: for _ in range(len(prod.rhs)): if not stack: raise YACVError('Stack prematurely empty') stack.pop(-1) # pops the state number if not stack: raise YACVError('Stack prematurely empty') popped_list.append(stack.pop(-1)) # pops the symbol else: new_tree.desc.append(AbstractSyntaxTree(YACV_EPSILON)) for i in range(len(popped_list)-1, -1, -1): new_tree.desc.append(popped_list[i]) new_top = stack[-1] nonterminal = prod.lhs new_state = self.parsing_table.at[new_top, (YACV_GOTO, nonterminal)] stack.append(new_tree) if isinstance(new_state, list): new_state = new_state[0] stack.append(int(new_state)) elif entry == YACV_ACCEPT: prod = self.grammar.prods[0] assert prod.rhs[-1] == '$' and len(prod.rhs) == 2 if not stack: raise ValueError() # TODO: Convert this to YACVError stating an error has occurred due to stack becoming empty prematurely stack.pop(-1) if not stack: raise ValueError() # TODO: Convert this to YACVError stating an error has occurred due to stack becoming empty prematurely tree = stack.pop(-1) log.info('Parse successful') log.debug('Final tree = {}'.format(tree)) return tree break else: raise YACVError('Unknown error while parsing') break