Example #1
0
def create_bnf_langlet(bnf_grammar_file, lexer_file):
    '''
    Construct an ad-hoc langlet from a BNF grammar file.
    '''
    # parser-rules
    cst = bnfreader.parse_file(bnf_grammar_file)
    parser_rules = []
    # do some normalization of rules of the grammar file
    for rule in find_all(cst, bnfreader.symbol.rule):
        ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split()) + "\n"
        parser_rules.append(ls_rule)
    bnf_grammar = "".join(parser_rules)
    langlet_id = 1000 * 100
    parse_symbol = SymbolObject(langlet_id)
    parse_symbol.create(parser_rules)

    # lexer-rules
    with open(lexer_file) as f_lex:
        lexer_rules = ls_grammar.unparse(ls_grammar.parse(
            f_lex.read())).split("\n")
    lex_symbol = SymbolObject(langlet_id, 100)
    lex_symbol.create(lexer_rules)
    # create NFAs but don't compute properties. This won't work because
    # left recursion prevents first-sets ( reachables ) to be derived.
    langlet = LangletObject(langlet_id, parse_symbol, lex_symbol)
    nfagen = NFAGenerator(langlet)
    nfas = nfagen.create_all(bnf_grammar)
    langlet.nfas = nfas
    langlet.keywords = nfagen.keywords
    return langlet
Example #2
0
def create_bnf_langlet(bnf_grammar_file, lexer_file):
    '''
    Construct an ad-hoc langlet from a BNF grammar file.
    '''
    # parser-rules
    cst = bnfreader.parse_file(bnf_grammar_file)
    parser_rules = []
    # do some normalization of rules of the grammar file
    for rule in find_all(cst, bnfreader.symbol.rule):
        ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split())+"\n"
        parser_rules.append(ls_rule)
    bnf_grammar  = "".join(parser_rules)
    langlet_id   = 1000*100
    parse_symbol = SymbolObject(langlet_id)
    parse_symbol.create(parser_rules)

    # lexer-rules
    with open(lexer_file) as f_lex:
        lexer_rules = ls_grammar.unparse(ls_grammar.parse(f_lex.read())).split("\n")
    lex_symbol  = SymbolObject(langlet_id, 100)
    lex_symbol.create(lexer_rules)
    # create NFAs but don't compute properties. This won't work because
    # left recursion prevents first-sets ( reachables ) to be derived.
    langlet = LangletObject(langlet_id, parse_symbol, lex_symbol)
    nfagen  = NFAGenerator(langlet)
    nfas    = nfagen.create_all(bnf_grammar)
    langlet.nfas = nfas
    langlet.keywords = nfagen.keywords
    return langlet
Example #3
0
 def __init__(self, langlet, rule):
     try:
         self.rule = rule.tokstream
     except AttributeError:
         self.rule = rule
     self.nfagenerator = NFAGenerator(langlet, "Parser")
     R = [s[1] for s in self.rule]
     self.nfa = self.nfagenerator.from_ebnf(" ".join(R))
Example #4
0
 def create_grammar(self):
     symobject = SymbolObject(ls_grammar.langlet_id)
     R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
     symobject.create(R)
     self.langlet.parse_symbol = symobject
     self.nfagenerator = NFAGenerator(self.langlet, "Parser")
     self.nfagenerator.create_all("\n".join(R)+"\n")
     if self.nfagenerator.nfas:
         self.nfagenerator.derive_properties()
         self.nfagenerator.expand_nfas()
Example #5
0
 def create_grammar(self, report=False, expansion=True):
     symobject = SymbolObject(self.langlet.langlet_id)
     R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
     symobject.create(R)
     self.langlet.parse_symbol = self.langlet.symbol = symobject
     self.nfagenerator = NFAGenerator(self.langlet, "Parser")
     self.grammar = "\n".join(R) + "\n"
     # print grammar
     self.nfagenerator.create_all(self.grammar)
     if self.nfagenerator.nfas:
         self.nfagenerator.derive_properties()
         if expansion:
             self.nfagenerator.expand_nfas(report=report)
     self.langlet.parse_nfa = self.nfagenerator.nfadata
     self.langlet.keywords = self.langlet.parse_nfa.keywords
Example #6
0
class GrammarObject(object):
    def __init__(self, rules):
        '''
        @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams.
        '''
        self.rules = rules
        self.langlet = GrammarLanglet()
        self.nfagenerator = None
        self.langlet.langlet_id = ls_grammar.langlet_id
        self.grammar      = ""

    def set_langlet_id(self, ll_id):
        self.langlet.langlet_id = ll_id

    def create_grammar(self, report = False, expansion = True):
        symobject = SymbolObject(self.langlet.langlet_id)
        R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
        symobject.create(R)
        self.langlet.parse_symbol = self.langlet.symbol = symobject
        self.nfagenerator = NFAGenerator(self.langlet, "Parser")
        self.grammar = "\n".join(R)+"\n"
        # print grammar
        self.nfagenerator.create_all(self.grammar)
        if self.nfagenerator.nfas:
            self.nfagenerator.derive_properties()
            if expansion:
                self.nfagenerator.expand_nfas(report = report)
        self.langlet.parse_nfa = self.nfagenerator.nfadata
        self.langlet.keywords  = self.langlet.parse_nfa.keywords

    def get_nfas(self):
        return self.nfagenerator.nfas

    def get_start_symbol(self):
        return self.nfagenerator.nfadata.start_symbols[0]


    @classmethod
    def grammar_from_rule(cls, grammar_rule, report = False):
        rules = []
        R = ls_grammar.tokenize(grammar_rule)
        rules.append(R)
        names = set()
        strings = set()
        for t in R[2:]:
            if t[0] == ls_grammar.parse_token.NAME:
                names.add(t[1])
            elif t[0] == ls_grammar.parse_token.STRING:
                strings.add(t[1])
        for i,name in enumerate(names):
            n = 1
            k = str(i)*n
            while (name+k in names or "'"+name+k+"'" in strings):
                n+=1
                k = str(i)*n

            rules.append(ls_grammar.tokenize("%s: '%s'"%(name, name+k)))
        go = GrammarObject(rules)
        go.create_grammar(report = report)
        return go
Example #7
0
class GrammarObject(object):
    def __init__(self, rules):
        '''
        @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams.
        '''
        self.rules = rules
        self.langlet = GrammarLanglet()
        self.nfagenerator = None
        self.langlet.langlet_id = ls_grammar.langlet_id
        self.grammar = ""

    def set_langlet_id(self, ll_id):
        self.langlet.langlet_id = ll_id

    def create_grammar(self, report=False, expansion=True):
        symobject = SymbolObject(self.langlet.langlet_id)
        R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
        symobject.create(R)
        self.langlet.parse_symbol = self.langlet.symbol = symobject
        self.nfagenerator = NFAGenerator(self.langlet, "Parser")
        self.grammar = "\n".join(R) + "\n"
        # print grammar
        self.nfagenerator.create_all(self.grammar)
        if self.nfagenerator.nfas:
            self.nfagenerator.derive_properties()
            if expansion:
                self.nfagenerator.expand_nfas(report=report)
        self.langlet.parse_nfa = self.nfagenerator.nfadata
        self.langlet.keywords = self.langlet.parse_nfa.keywords

    def get_nfas(self):
        return self.nfagenerator.nfas

    def get_start_symbol(self):
        return self.nfagenerator.nfadata.start_symbols[0]

    @classmethod
    def grammar_from_rule(cls, grammar_rule, report=False):
        rules = []
        R = ls_grammar.tokenize(grammar_rule)
        rules.append(R)
        names = set()
        strings = set()
        for t in R[2:]:
            if t[0] == ls_grammar.parse_token.NAME:
                names.add(t[1])
            elif t[0] == ls_grammar.parse_token.STRING:
                strings.add(t[1])
        for i, name in enumerate(names):
            n = 1
            k = str(i) * n
            while (name + k in names or "'" + name + k + "'" in strings):
                n += 1
                k = str(i) * n

            rules.append(ls_grammar.tokenize("%s: '%s'" % (name, name + k)))
        go = GrammarObject(rules)
        go.create_grammar(report=report)
        return go
Example #8
0
 def create_grammar(self, report = False, expansion = True):
     symobject = SymbolObject(self.langlet.langlet_id)
     R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
     symobject.create(R)
     self.langlet.parse_symbol = self.langlet.symbol = symobject
     self.nfagenerator = NFAGenerator(self.langlet, "Parser")
     self.grammar = "\n".join(R)+"\n"
     # print grammar
     self.nfagenerator.create_all(self.grammar)
     if self.nfagenerator.nfas:
         self.nfagenerator.derive_properties()
         if expansion:
             self.nfagenerator.expand_nfas(report = report)
     self.langlet.parse_nfa = self.nfagenerator.nfadata
     self.langlet.keywords  = self.langlet.parse_nfa.keywords
Example #9
0
    def terminate(self, fit_val):
        return False

    def mutate(self, individual):
        try:
            R = individual.rule
            op = ["insert", "subst", "delete"][random.randrange(0,3)]
            fn = getattr(self, op)
            print ' '.join([s[1] for s in fn(R)])
            ro = RuleObject(self.rule_langlet, fn(R))
            return ro
        except (GrammarError, NodeCycleError):
            pass
        except (KeyError, RuntimeError):
            self.display_individual(ro, 0)
            raise


if __name__ == '__main__':
    python = langscape.load_langlet("python")
    nfagen = NFAGenerator(python)
    import pprint
    nfa = nfagen.from_ebnf("file_input: (NEWLINE | stmt)* ENDMARKER")
    pprint.pprint(nfa)

    rgen = ReverseGen(python, nfa)
    print rgen.names
    rgen.evolve(size = 20, generations = 250)