def create_bnf_langlet(bnf_grammar_file, lexer_file): ''' Construct an ad-hoc langlet from a BNF grammar file. ''' # parser-rules cst = bnfreader.parse_file(bnf_grammar_file) parser_rules = [] # do some normalization of rules of the grammar file for rule in find_all(cst, bnfreader.symbol.rule): ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split()) + "\n" parser_rules.append(ls_rule) bnf_grammar = "".join(parser_rules) langlet_id = 1000 * 100 parse_symbol = SymbolObject(langlet_id) parse_symbol.create(parser_rules) # lexer-rules with open(lexer_file) as f_lex: lexer_rules = ls_grammar.unparse(ls_grammar.parse( f_lex.read())).split("\n") lex_symbol = SymbolObject(langlet_id, 100) lex_symbol.create(lexer_rules) # create NFAs but don't compute properties. This won't work because # left recursion prevents first-sets ( reachables ) to be derived. langlet = LangletObject(langlet_id, parse_symbol, lex_symbol) nfagen = NFAGenerator(langlet) nfas = nfagen.create_all(bnf_grammar) langlet.nfas = nfas langlet.keywords = nfagen.keywords return langlet
def __init__(self, langlet, rule): try: self.rule = rule.tokstream except AttributeError: self.rule = rule self.nfagenerator = NFAGenerator(langlet, "Parser") R = [s[1] for s in self.rule] self.nfa = self.nfagenerator.from_ebnf(" ".join(R))
def create_grammar(self): symobject = SymbolObject(ls_grammar.langlet_id) R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules] symobject.create(R) self.langlet.parse_symbol = symobject self.nfagenerator = NFAGenerator(self.langlet, "Parser") self.nfagenerator.create_all("\n".join(R)+"\n") if self.nfagenerator.nfas: self.nfagenerator.derive_properties() self.nfagenerator.expand_nfas()
def create_grammar(self, report=False, expansion=True): symobject = SymbolObject(self.langlet.langlet_id) R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules] symobject.create(R) self.langlet.parse_symbol = self.langlet.symbol = symobject self.nfagenerator = NFAGenerator(self.langlet, "Parser") self.grammar = "\n".join(R) + "\n" # print grammar self.nfagenerator.create_all(self.grammar) if self.nfagenerator.nfas: self.nfagenerator.derive_properties() if expansion: self.nfagenerator.expand_nfas(report=report) self.langlet.parse_nfa = self.nfagenerator.nfadata self.langlet.keywords = self.langlet.parse_nfa.keywords
def terminate(self, fit_val): return False def mutate(self, individual): try: R = individual.rule op = ["insert", "subst", "delete"][random.randrange(0,3)] fn = getattr(self, op) print ' '.join([s[1] for s in fn(R)]) ro = RuleObject(self.rule_langlet, fn(R)) return ro except (GrammarError, NodeCycleError): pass except (KeyError, RuntimeError): self.display_individual(ro, 0) raise if __name__ == '__main__': python = langscape.load_langlet("python") nfagen = NFAGenerator(python) import pprint nfa = nfagen.from_ebnf("file_input: (NEWLINE | stmt)* ENDMARKER") pprint.pprint(nfa) rgen = ReverseGen(python, nfa) print rgen.names rgen.evolve(size = 20, generations = 250)