class GrammarObject(object): def __init__(self, rules): ''' @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams. ''' self.rules = rules self.langlet = GrammarLanglet() self.nfagenerator = None self.langlet.langlet_id = ls_grammar.langlet_id self.grammar = "" def set_langlet_id(self, ll_id): self.langlet.langlet_id = ll_id def create_grammar(self, report = False, expansion = True): symobject = SymbolObject(self.langlet.langlet_id) R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules] symobject.create(R) self.langlet.parse_symbol = self.langlet.symbol = symobject self.nfagenerator = NFAGenerator(self.langlet, "Parser") self.grammar = "\n".join(R)+"\n" # print grammar self.nfagenerator.create_all(self.grammar) if self.nfagenerator.nfas: self.nfagenerator.derive_properties() if expansion: self.nfagenerator.expand_nfas(report = report) self.langlet.parse_nfa = self.nfagenerator.nfadata self.langlet.keywords = self.langlet.parse_nfa.keywords def get_nfas(self): return self.nfagenerator.nfas def get_start_symbol(self): return self.nfagenerator.nfadata.start_symbols[0] @classmethod def grammar_from_rule(cls, grammar_rule, report = False): rules = [] R = ls_grammar.tokenize(grammar_rule) rules.append(R) names = set() strings = set() for t in R[2:]: if t[0] == ls_grammar.parse_token.NAME: names.add(t[1]) elif t[0] == ls_grammar.parse_token.STRING: strings.add(t[1]) for i,name in enumerate(names): n = 1 k = str(i)*n while (name+k in names or "'"+name+k+"'" in strings): n+=1 k = str(i)*n rules.append(ls_grammar.tokenize("%s: '%s'"%(name, name+k))) go = GrammarObject(rules) go.create_grammar(report = report) return go
class GrammarObject(object): def __init__(self, rules): ''' @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams. ''' self.rules = rules self.langlet = GrammarLanglet() self.nfagenerator = None self.langlet.langlet_id = ls_grammar.langlet_id self.grammar = "" def set_langlet_id(self, ll_id): self.langlet.langlet_id = ll_id def create_grammar(self, report=False, expansion=True): symobject = SymbolObject(self.langlet.langlet_id) R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules] symobject.create(R) self.langlet.parse_symbol = self.langlet.symbol = symobject self.nfagenerator = NFAGenerator(self.langlet, "Parser") self.grammar = "\n".join(R) + "\n" # print grammar self.nfagenerator.create_all(self.grammar) if self.nfagenerator.nfas: self.nfagenerator.derive_properties() if expansion: self.nfagenerator.expand_nfas(report=report) self.langlet.parse_nfa = self.nfagenerator.nfadata self.langlet.keywords = self.langlet.parse_nfa.keywords def get_nfas(self): return self.nfagenerator.nfas def get_start_symbol(self): return self.nfagenerator.nfadata.start_symbols[0] @classmethod def grammar_from_rule(cls, grammar_rule, report=False): rules = [] R = ls_grammar.tokenize(grammar_rule) rules.append(R) names = set() strings = set() for t in R[2:]: if t[0] == ls_grammar.parse_token.NAME: names.add(t[1]) elif t[0] == ls_grammar.parse_token.STRING: strings.add(t[1]) for i, name in enumerate(names): n = 1 k = str(i) * n while (name + k in names or "'" + name + k + "'" in strings): n += 1 k = str(i) * n rules.append(ls_grammar.tokenize("%s: '%s'" % (name, name + k))) go = GrammarObject(rules) go.create_grammar(report=report) return go
def create_bnf_langlet(bnf_grammar_file, lexer_file): ''' Construct an ad-hoc langlet from a BNF grammar file. ''' # parser-rules cst = bnfreader.parse_file(bnf_grammar_file) parser_rules = [] # do some normalization of rules of the grammar file for rule in find_all(cst, bnfreader.symbol.rule): ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split())+"\n" parser_rules.append(ls_rule) bnf_grammar = "".join(parser_rules) langlet_id = 1000*100 parse_symbol = SymbolObject(langlet_id) parse_symbol.create(parser_rules) # lexer-rules with open(lexer_file) as f_lex: lexer_rules = ls_grammar.unparse(ls_grammar.parse(f_lex.read())).split("\n") lex_symbol = SymbolObject(langlet_id, 100) lex_symbol.create(lexer_rules) # create NFAs but don't compute properties. This won't work because # left recursion prevents first-sets ( reachables ) to be derived. langlet = LangletObject(langlet_id, parse_symbol, lex_symbol) nfagen = NFAGenerator(langlet) nfas = nfagen.create_all(bnf_grammar) langlet.nfas = nfas langlet.keywords = nfagen.keywords return langlet
class GrammarObject(object): def __init__(self, rules): self.rules = rules self.langlet = GrammarLanglet() self.nfagenerator = None def create_grammar(self): symobject = SymbolObject(ls_grammar.langlet_id) R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules] symobject.create(R) self.langlet.parse_symbol = symobject self.nfagenerator = NFAGenerator(self.langlet, "Parser") self.nfagenerator.create_all("\n".join(R)+"\n") if self.nfagenerator.nfas: self.nfagenerator.derive_properties() self.nfagenerator.expand_nfas()
def create_bnf_langlet(bnf_grammar_file, lexer_file): ''' Construct an ad-hoc langlet from a BNF grammar file. ''' # parser-rules cst = bnfreader.parse_file(bnf_grammar_file) parser_rules = [] # do some normalization of rules of the grammar file for rule in find_all(cst, bnfreader.symbol.rule): ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split()) + "\n" parser_rules.append(ls_rule) bnf_grammar = "".join(parser_rules) langlet_id = 1000 * 100 parse_symbol = SymbolObject(langlet_id) parse_symbol.create(parser_rules) # lexer-rules with open(lexer_file) as f_lex: lexer_rules = ls_grammar.unparse(ls_grammar.parse( f_lex.read())).split("\n") lex_symbol = SymbolObject(langlet_id, 100) lex_symbol.create(lexer_rules) # create NFAs but don't compute properties. This won't work because # left recursion prevents first-sets ( reachables ) to be derived. langlet = LangletObject(langlet_id, parse_symbol, lex_symbol) nfagen = NFAGenerator(langlet) nfas = nfagen.create_all(bnf_grammar) langlet.nfas = nfas langlet.keywords = nfagen.keywords return langlet