def teardown(self, g: grammar.Grammar): """We have introduced new nodes, so we need to recalculate min tokens. FIXME: Increasingly min tokens looks like it shouldn't be part of initial grammar creation. """ g._calc_min_tokens() g._calc_pot_tokens()
def parse(srcfile: TextIO, len_based_size=False) -> Grammar: """Interface function to LL parser of BNF. Populates TERMINALS and NONTERMINALS """ config.LEN_BASED_SIZE = len_based_size # update global accordingly to be used in grammar. stream = TokenStream(srcfile) gram = Grammar(srcfile.name.rpartition('/')[-1]) _grammar(stream, gram) gram.finalize() return gram
def teardown(self, g: grammar.Grammar): """Prune orphan symbols""" # Cannot remove start symbol! self.unit_symbols.discard(g.start) symbols = list(g.symbols.keys()) for name in symbols: if g.symbols[name] in self.unit_symbols: del g.symbols[name] g._calc_min_tokens() g._calc_pot_tokens()
def _bnf_seq(stream: TokenStream, gram: Grammar) -> RHSItem: """Sequence of rhs items""" # Could be an empty list ... if stream.peek().kind == TokenCat.TERMINATOR: return gram.seq() # The empty sequence first = _bnf_primary(stream, gram) # Could be a single item if stream.peek().kind not in FIRST_SYM: return first seq = gram.seq() seq.append(first) while stream.peek().kind in FIRST_SYM: next_item = _bnf_primary(stream, gram) seq.append(next_item) return seq
def _lex_rhs(stream: TokenStream, gram: Grammar) -> _Literal: """FIXME: How should we define lexical productions?""" token = stream.take() if token.kind == TokenCat.STRING or token.kind == TokenCat.NUMBER: return gram.literal(token.value) else: raise InputError(f"Lexical RHS should be string literal or integer")
def _bnf_symbol(stream: TokenStream, gram: Grammar) -> RHSItem: """A single identifier or literal, or a parenthesized group""" if stream.peek().kind == TokenCat.LPAREN: stream.take() subseq = _bnf_rhs(stream, gram) require(stream, TokenCat.RPAREN, consume=True) # log.debug(f"Subsequence group: {subseq}") return subseq token = stream.take() if token.kind == TokenCat.STRING or token.kind == TokenCat.CHAR: # log.debug("Forming literal") return gram.literal(token.value[1:-1]) # Clips quotes elif token.kind == TokenCat.IDENT: # log.debug("Forming symbol") return gram.symbol(token.value) else: raise InputError(f"Unexpected input token {token.value}")
def _bnf_primary(stream: TokenStream, gram: Grammar) -> RHSItem: """A symbol or group, possibly with kleene star""" item = _bnf_symbol(stream, gram) # log.debug(f"Primary: {item}") if stream.peek().kind == TokenCat.KLEENE: token = stream.take() return gram.kleene(item) else: return item
def _statement(stream: TokenStream, gram: Grammar): """ _statement == production | merge (left-factored for lookahead) """ require(stream, TokenCat.IDENT, desc="Statement should begin with symbol") lhs_ident = stream.take().value prod_type = stream.take() if prod_type.kind == TokenCat.BNFPROD: lhs_sym = gram.symbol(lhs_ident) rhs = _bnf_rhs(stream, gram) gram.add_cfg_prod(lhs_sym, rhs) elif prod_type.kind == TokenCat.BNFMERGE: merge_list = _merge_symbols(stream) # Merges are symmetric, so order doesn't matter merge_list.append(lhs_ident) gram.merge_symbols(merge_list) require(stream, TokenCat.TERMINATOR, "Statement must end with terminator", consume=True)
def _bnf_rhs(stream: TokenStream, gram: Grammar) -> RHSItem: choice = _bnf_seq(stream, gram) # Special case: Only one alternative if stream.peek().kind != TokenCat.DISJUNCT: return choice choices = gram.choice() choices.append(choice) while stream.peek().kind == TokenCat.DISJUNCT: stream.take() choice = _bnf_seq(stream, gram) choices.append(choice) return choices