Example #1
0
def main():
    import sys, PgenParser, pprint, DFAParser

    # ____________________________________________________________
    # Generate a test parser
    grammarST = PgenParser.parseFile("Grammar")
    pgenObj = PyPgen()
    grammarObj = pgenObj(grammarST)
    if "-py" in sys.argv:
        print "# %s" % ("_" * 70)
        print "# This was automatically generated by PyPgen."
        print "# Hack at your own risk."
        print
        print "grammarObj =",
    pprint.pprint(grammarObj)
    if "-i" in sys.argv:
        # __________________________________________________
        # Parse some input
        if len(sys.argv) > 1:
            inputFile = sys.argv[1]
            fileObj = open(inputFile)
        else:
            inputFile = "<stdin>"
            fileObj = sys.stdin
        tokenizer = StdTokenizer.StdTokenizer().tokenize(fileObj)
        parseTree = DFAParser.parsetok(tokenizer, grammarObj, 257)
        fileObj.close()
        # __________________________________________________
        # Show the result
        if __BASIL__:
            from basil.visuals.TreeBox import showTree

            showTree(parseTree).mainloop()
        else:
            pprint.pprint(parseTree)
Example #2
0
def _seq2st (seqobj):
    """_seq2st()
    This recursively translates the more svelte sequence syntax tree to the
    much more wasteful syntax tree representation returned by the parser
    machinery.
    This returns a pair consisting of a pair where the first item is the
    translated tree and the second item is the line number for the tree (this
    is used in an attempt to recursively reconstruct line number information
    for nonterminal symbols).
    """
    symbol_no = seqobj[0]
    if symbol_no >= token.NT_OFFSET:
        assert len(seqobj) > 1
        # This is going to create a list of pairs of (node, line_no) data:
        child_data = [_seq2st(child) for child in seqobj[1:]]
        children = map(lambda (x,y) : x, child_data)
        # Validate the children against the DFA for the non-terminal.
        dfa = DFAParser.findDFA(pygrammar, symbol_no)
        _validateChildren(dfa, children)
        # Compute a line number and create the actual node object.
        line_no = min(map(lambda (x, y) : y, child_data))
        node = ((symbol_no, '', line_no), children)
    else:
        if len(seqobj) == 3:
            node = (seqobj, [])
            line_no = seqobj[2]
        elif len(seqobj) == 2:
            node = ((symbol_no, seqobj[1], 0), [])
            line_no = 0
        else:
            raise ParserError("terminal nodes must have 2 or 3 entries")
    return node, line_no
Example #3
0
def _doParse (source, start):
    """_doParse()
    Ignore the function behind the curtain!  Even if it is kinda like the
    CPython PyParser_SimpleParseString() (I think.)
    """
    global pygrammar
    tokenizer = PyTokenizer.PyTokenizer().tokenizeString(source)
    return STType(DFAParser.parsetok(tokenizer, pygrammar, start))
Example #4
0
def _validateChildren (dfa, children):
    """_validateChildren()
    """
    global pygrammar
    classify = lambda sym, name : DFAParser.classify(pygrammar, sym, name)
    symbol_no, symbol_name, initial, states, first = dfa
    crnt_state = states[initial]
    for child in children:
        ((child_symbol, child_text, child_line_no), grandchildren) = child
        ilabel = classify(child_symbol, child_text)
        arcs, accel, accept = crnt_state
        next_state = None
        for (arc_label, arc_state) in arcs:
            if ilabel == arc_label:
                next_state = states[arc_state]
                break
        if next_state == None:
            raise ParserError("symbol %d should be in %s" %
                              (ilabel, str(arcs)))
        else:
            crnt_state = next_state
    if crnt_state[2] != 1:
        raise ParserError("incomplete sequence of children (ended with %s)" %
                          str(child[0]))
Example #5
0
as if st2tuple() was already called on the output.  It's only claim to fame
is that is done using Pure Python(tm) with none of those icky C extensions
making it run fast.
"""
# ______________________________________________________________________
# Module imports

import token, exceptions, compiler
import PyTokenizer, PyGrammar, DFAParser

# ______________________________________________________________________
# XXX What I really want to do is parameterize this module over an
# input grammar object.  Perhaps this can be done using an external
# class and then pyparser just calls into an instance of this class.

pygrammar = DFAParser.addAccelerators(PyGrammar.grammarObj)

# ______________________________________________________________________
# ParserError exception

class ParserError (exceptions.Exception):
    """Class ParserError
    Exception class for parser errors (I assume).
    """
    pass

# ______________________________________________________________________

class STType (object):
    """Class STType
    """