コード例 #1
0
ファイル: grammar.py プロジェクト: UniGrammar/wisent
    def write_example(self, fd=sys.stdout, params={}):
        word = [self.rules[-1][1]]
        todo = set(self.rules.keys())

        nt = self.nonterminals

        def count_nt(k):
            return len([X for X in self.rules[k][1:] if X in nt])

        while todo:
            actions = []
            for i, X in enumerate(word):
                if X not in nt:
                    continue
                rules = set(k for k, l in self.rule_from_head[X]) & todo
                for k in rules:
                    actions.append((i, k))
            good_actions = [a for a in actions if count_nt(a[1]) > 1]
            if good_actions:
                actions = good_actions
            try:
                i, k = choice(actions)
            except IndexError:
                break
            word[i:i + 1] = self.rules[k][1:]
            if uniform(0, 1) < 0.1 * len(word):
                todo.discard(k)
        short = self.shortcuts()
        res = []
        for X in word:
            res.extend(repr((Y, )) for Y in short[X])

        parser = params.get("parser_name", "")
        if parser.endswith(".py"):
            parser = basename(parser)[:-3]
        else:
            parser = "..."

        write_block(fd,
                    0,
                    """
        #! /usr/bin/env python
        # %(example_name)s - illustrate the use of a Wisent-generated parser
        # example code autogenerated on %(date)s
        # generator: wisent %(version)s, http://seehuhn.de/pages/wisent
        """ % params,
                    first=True)
        if 'fname' in params:
            fd.write("# source: %(fname)s\n" % params)

        fd.write('\n')
        fd.write('from sys import stderr\n')
        fd.write('\n')
        fd.write('from %s import Parser\n' % parser)

        write_block(fd, 0, getsource(template.print_tree))

        fd.write('\n')
        for l in split_it(res, start1="input = [ ", end2=" ]"):
            fd.write(l + '\n')
        write_block(
            fd, 0, """
        p = Parser()
        try:
            tree = p.parse(input)
        except p.ParseErrors, e:
            for token,expected in e.errors:
                if token[0] == p.EOF:
                    print >>stderr, "unexpected end of file"
                    continue

                found = repr(token[0])
                if len(expected) == 1:
                    msg = "missing %s (found %s)"%(repr(expected[0]), found)
                else:
                    msg1 = "parse error before %s, "%found
                    l = sorted([ repr(s) for s in expected ])
                    msg2 = "expected one of "+", ".join(l)
                    msg = msg1+msg2
                print >>stderr, msg
            raise SystemExit(1)
        """)
        fd.write('\n')
        fd.write('print_tree(tree, p.terminals)\n')
コード例 #2
0
ファイル: grammar.py プロジェクト: Lalufu/wisent
    def write_example(self, fd=sys.stdout, params={}):
        word = [ self.rules[-1][1] ]
        todo = set(self.rules.keys())

        nt = self.nonterminals
        def count_nt(k):
            return len([X for X in self.rules[k][1:] if X in nt])

        while todo:
            actions = []
            for i,X in enumerate(word):
                if X not in nt:
                    continue
                rules = set(k for k,l in self.rule_from_head[X])&todo
                for k in rules:
                    actions.append((i,k))
            good_actions = [ a for a in actions if count_nt(a[1])>1 ]
            if good_actions:
                actions = good_actions
            try:
                i,k = choice(actions)
            except IndexError:
                break
            word[i:i+1] = self.rules[k][1:]
            if uniform(0,1)<0.1*len(word):
                todo.discard(k)
        short = self.shortcuts()
        res = []
        for X in word:
            res.extend(repr((Y,)) for Y in short[X])

        parser = params.get("parser_name", "")
        if parser.endswith(".py"):
            parser = basename(parser)[:-3]
        else:
            parser = "..."

        write_block(fd, 0, """
        #! /usr/bin/env python
        # %(example_name)s - illustrate the use of a Wisent-generated parser
        # example code autogenerated on %(date)s
        # generator: wisent %(version)s, http://seehuhn.de/pages/wisent
        """%params, first=True)
        if 'fname' in params:
            fd.write("# source: %(fname)s\n"%params)

        fd.write('\n')
        fd.write('from sys import stderr\n')
        fd.write('\n')
        fd.write('from %s import Parser\n'%parser)

        write_block(fd, 0, getsource(template.print_tree))

        fd.write('\n')
        for l in split_it(res, start1="input = [ ", end2=" ]"):
            fd.write(l+'\n')
        write_block(fd, 0, """
        p = Parser()
        try:
            tree = p.parse(input)
        except p.ParseErrors, e:
            for token,expected in e.errors:
                if token[0] == p.EOF:
                    print >>stderr, "unexpected end of file"
                    continue

                found = repr(token[0])
                if len(expected) == 1:
                    msg = "missing %s (found %s)"%(repr(expected[0]), found)
                else:
                    msg1 = "parse error before %s, "%found
                    l = sorted([ repr(s) for s in expected ])
                    msg2 = "expected one of "+", ".join(l)
                    msg = msg1+msg2
                print >>stderr, msg
            raise SystemExit(1)
        """)
        fd.write('\n')
        fd.write('print_tree(tree, p.terminals)\n')
コード例 #3
0
    def write_parser(self, fd, options={}):
        """Emit Python code implementing the parser.

        A complete, stand-alone Python source file implementing the
        parser is written to the file-like object `fd`, each line of
        the output is prefixed with the string `prefix`.
        """
        self.check()

        params = options.copy()

        from time import strftime
        params.setdefault('date', strftime("%Y-%m-%d %H:%M:%S"))
        params['version'] = VERSION

        write_block(fd,
                    0,
                    """# LR(1) parser, autogenerated on %(date)s
# generator: wisent %(version)s, http://seehuhn.de/pages/wisent
        """ % params,
                    first=True)
        if 'fname' in params:
            fd.write("# source: %(fname)s\n" % params)

        write_block(
            fd, 0, """
# All parts of this file which are not taken verbatim from the input grammar
# are covered by the following notice:
#""")
        fd.write(getcomments(template))

        fd.write('\n')
        fd.write('from itertools import chain\n')

        write_block(fd, 0, getsource(Unique))
        fd.write('\n')

        fd.write('class Parser(object):\n\n')

        fd.write('    """LR(1) parser class.\n')
        if params.get("parser_debugprint", False):
            write_block(
                fd, 4, """
            Instances of this class print additional debug messages and are
            not suitable for production use.
            """)
        fd.write('\n')
        self.g.write_terminals(fd, "    ")
        fd.write('\n')
        self.g.write_nonterminals(fd, "    ")
        fd.write('\n')
        self.g.write_productions(fd, "    ")
        if self.replace_nonterminals:
            write_block(
                fd, 4, """
            In the returned parse trees, nonterminal symbols are
            replaced by numbers.  You can use the dictionary
            `Parser.nonterminals` to map back the numeric codes to the
            corresponding symbols.
            """)
        fd.write('    """\n')

        if "parser_comment" in params:
            fd.write('\n')
            self.write_transition_table(fd)
            fd.write('\n')
            self.write_parser_states(fd)

        write_block(fd, 4, getsource(template.Parser.ParseErrors))

        fd.write('\n')
        tt = map(repr, sorted(self.g.terminals - set([self.g.EOF])))
        for l in split_it(tt,
                          padding="    ",
                          start1="terminals = [ ",
                          end2=" ]"):
            fd.write(l + '\n')
        nt_tab = self.nt_tab
        transparent = params.get("transparent_tokens", set())
        transparent &= self.g.nonterminals
        if self.replace_nonterminals:
            symbols = self.g.nonterminals - set([self.g.start]) - transparent
            nonterminals = sorted(symbols)
            tt = ["%d: %s" % (nt_tab[X], repr(X)) for X in nonterminals]
            for l in split_it(tt,
                              padding="    ",
                              start1="nonterminals = { ",
                              end2=" }"):
                fd.write(l + '\n')
        if transparent:
            tt = [repr(nt_tab[X]) for X in sorted(transparent)]
            for l in split_it(tt,
                              padding="    ",
                              start1="_transparent = [ ",
                              end2=" ]"):
                fd.write(l + '\n')

        fd.write("    EOF = Unique('EOF')\n")
        fd.write("    S = Unique('S')\n")

        # halting state
        fd.write('\n')
        fd.write("    _halting_state = %s\n" % self.halting_state)

        # reduce actions
        rtab = self.rtab
        r_items = [
            "%s: %s" % (repr(key), repr(rtab[key]))
            for key in sorted(self.rtab)
        ]
        fd.write("    _reduce = {\n")
        for l in split_it(r_items, padding="        "):
            fd.write(l + '\n')
        fd.write("    }\n")

        # goto table
        gtab = self.gtab
        g_items = [
            "%s: %s" % (repr(key), repr(gtab[key]))
            for key in sorted(self.gtab)
        ]
        fd.write("    _goto = {\n")
        for l in split_it(g_items, padding="        "):
            fd.write(l + '\n')
        fd.write("    }\n")

        # shift table
        stab = self.stab
        s_items = [
            "%s: %s" % (repr(key), repr(stab[key]))
            for key in sorted(self.stab)
        ]
        fd.write("    _shift = {\n")
        for l in split_it(s_items, padding="        "):
            fd.write(l + '\n')
        fd.write("    }\n")

        write_block(fd, 4, getsource(template.Parser.__init__), params)
        write_block(fd, 4, getsource(template.Parser.leaves), params)
        write_block(fd, 4, getsource(template.Parser._parse), params)
        write_block(fd, 4, getsource(template.Parser._try_parse), params)
        write_block(fd, 4, getsource(template.Parser.parse), params)
コード例 #4
0
ファイル: automaton.py プロジェクト: Lalufu/wisent
    def write_parser(self, fd, options={}):
        """Emit Python code implementing the parser.

        A complete, stand-alone Python source file implementing the
        parser is written to the file-like object `fd`, each line of
        the output is prefixed with the string `prefix`.
        """
        self.check()

        params = options.copy()

        from time import strftime
        params.setdefault('date', strftime("%Y-%m-%d %H:%M:%S"))
        params['version'] = VERSION

        write_block(fd, 0, """# LR(1) parser, autogenerated on %(date)s
# generator: wisent %(version)s, http://seehuhn.de/pages/wisent
        """%params, first=True)
        if 'fname' in params:
            fd.write("# source: %(fname)s\n"%params)

        write_block(fd, 0, """
# All parts of this file which are not taken verbatim from the input grammar
# are covered by the following notice:
#""")
        fd.write(getcomments(template))

        fd.write('\n')
        fd.write('from itertools import chain\n')

        write_block(fd, 0, getsource(Unique))
        fd.write('\n')

        fd.write('class Parser(object):\n\n')

        fd.write('    """LR(1) parser class.\n')
        if params.get("parser_debugprint", False):
            write_block(fd, 4, """
            Instances of this class print additional debug messages and are
            not suitable for production use.
            """)
        fd.write('\n')
        self.g.write_terminals(fd, "    ")
        fd.write('\n')
        self.g.write_nonterminals(fd, "    ")
        fd.write('\n')
        self.g.write_productions(fd, "    ")
        if self.replace_nonterminals:
            write_block(fd, 4, """
            In the returned parse trees, nonterminal symbols are
            replaced by numbers.  You can use the dictionary
            `Parser.nonterminals` to map back the numeric codes to the
            corresponding symbols.
            """)
        fd.write('    """\n')

        if "parser_comment" in params:
            fd.write('\n')
            self.write_transition_table(fd)
            fd.write('\n')
            self.write_parser_states(fd)

        write_block(fd, 4, getsource(template.Parser.ParseErrors))

        fd.write('\n')
        tt = map(repr, sorted(self.g.terminals-set([self.g.EOF])))
        for l in split_it(tt, padding="    ", start1="terminals = [ ",
                          end2=" ]"):
            fd.write(l+'\n')
        nt_tab = self.nt_tab
        transparent = params.get("transparent_tokens", set())
        transparent &= self.g.nonterminals
        if self.replace_nonterminals:
            symbols = self.g.nonterminals-set([self.g.start])-transparent
            nonterminals = sorted(symbols)
            tt = [ "%d: %s"%(nt_tab[X],repr(X)) for X in nonterminals ]
            for l in split_it(tt, padding="    ",
                              start1="nonterminals = { ", end2=" }"):
                fd.write(l+'\n')
        if transparent:
            tt = [ repr(nt_tab[X]) for X in sorted(transparent) ]
            for l in split_it(tt, padding="    ",
                              start1="_transparent = [ ", end2=" ]"):
                fd.write(l+'\n')

        fd.write("    EOF = Unique('EOF')\n")
        fd.write("    S = Unique('S')\n")

        # halting state
        fd.write('\n')
        fd.write("    _halting_state = %s\n"%self.halting_state)

        # reduce actions
        rtab = self.rtab
        r_items = [ "%s: %s"%(repr(key),repr(rtab[key]))
                    for key in sorted(self.rtab) ]
        fd.write("    _reduce = {\n")
        for l in split_it(r_items, padding="        "):
            fd.write(l+'\n')
        fd.write("    }\n")

        # goto table
        gtab = self.gtab
        g_items = [ "%s: %s"%(repr(key),repr(gtab[key]))
                    for key in sorted(self.gtab) ]
        fd.write("    _goto = {\n")
        for l in split_it(g_items, padding="        "):
            fd.write(l+'\n')
        fd.write("    }\n")

        # shift table
        stab = self.stab
        s_items = [ "%s: %s"%(repr(key),repr(stab[key]))
                    for key in sorted(self.stab) ]
        fd.write("    _shift = {\n")
        for l in split_it(s_items, padding="        "):
            fd.write(l+'\n')
        fd.write("    }\n")

        write_block(fd, 4, getsource(template.Parser.__init__), params)
        write_block(fd, 4, getsource(template.Parser.leaves), params)
        write_block(fd, 4, getsource(template.Parser._parse), params)
        write_block(fd, 4, getsource(template.Parser._try_parse), params)
        write_block(fd, 4, getsource(template.Parser.parse), params)