def main(): import sys, PgenParser, pprint, DFAParser # ____________________________________________________________ # Generate a test parser grammarST = PgenParser.parseFile("Grammar") pgenObj = PyPgen() grammarObj = pgenObj(grammarST) if "-py" in sys.argv: print "# %s" % ("_" * 70) print "# This was automatically generated by PyPgen." print "# Hack at your own risk." print print "grammarObj =", pprint.pprint(grammarObj) if "-i" in sys.argv: # __________________________________________________ # Parse some input if len(sys.argv) > 1: inputFile = sys.argv[1] fileObj = open(inputFile) else: inputFile = "<stdin>" fileObj = sys.stdin tokenizer = StdTokenizer.StdTokenizer().tokenize(fileObj) parseTree = DFAParser.parsetok(tokenizer, grammarObj, 257) fileObj.close() # __________________________________________________ # Show the result if __BASIL__: from basil.visuals.TreeBox import showTree showTree(parseTree).mainloop() else: pprint.pprint(parseTree)
def _seq2st (seqobj): """_seq2st() This recursively translates the more svelte sequence syntax tree to the much more wasteful syntax tree representation returned by the parser machinery. This returns a pair consisting of a pair where the first item is the translated tree and the second item is the line number for the tree (this is used in an attempt to recursively reconstruct line number information for nonterminal symbols). """ symbol_no = seqobj[0] if symbol_no >= token.NT_OFFSET: assert len(seqobj) > 1 # This is going to create a list of pairs of (node, line_no) data: child_data = [_seq2st(child) for child in seqobj[1:]] children = map(lambda (x,y) : x, child_data) # Validate the children against the DFA for the non-terminal. dfa = DFAParser.findDFA(pygrammar, symbol_no) _validateChildren(dfa, children) # Compute a line number and create the actual node object. line_no = min(map(lambda (x, y) : y, child_data)) node = ((symbol_no, '', line_no), children) else: if len(seqobj) == 3: node = (seqobj, []) line_no = seqobj[2] elif len(seqobj) == 2: node = ((symbol_no, seqobj[1], 0), []) line_no = 0 else: raise ParserError("terminal nodes must have 2 or 3 entries") return node, line_no
def _doParse (source, start): """_doParse() Ignore the function behind the curtain! Even if it is kinda like the CPython PyParser_SimpleParseString() (I think.) """ global pygrammar tokenizer = PyTokenizer.PyTokenizer().tokenizeString(source) return STType(DFAParser.parsetok(tokenizer, pygrammar, start))
def _validateChildren (dfa, children): """_validateChildren() """ global pygrammar classify = lambda sym, name : DFAParser.classify(pygrammar, sym, name) symbol_no, symbol_name, initial, states, first = dfa crnt_state = states[initial] for child in children: ((child_symbol, child_text, child_line_no), grandchildren) = child ilabel = classify(child_symbol, child_text) arcs, accel, accept = crnt_state next_state = None for (arc_label, arc_state) in arcs: if ilabel == arc_label: next_state = states[arc_state] break if next_state == None: raise ParserError("symbol %d should be in %s" % (ilabel, str(arcs))) else: crnt_state = next_state if crnt_state[2] != 1: raise ParserError("incomplete sequence of children (ended with %s)" % str(child[0]))
as if st2tuple() was already called on the output. It's only claim to fame is that is done using Pure Python(tm) with none of those icky C extensions making it run fast. """ # ______________________________________________________________________ # Module imports import token, exceptions, compiler import PyTokenizer, PyGrammar, DFAParser # ______________________________________________________________________ # XXX What I really want to do is parameterize this module over an # input grammar object. Perhaps this can be done using an external # class and then pyparser just calls into an instance of this class. pygrammar = DFAParser.addAccelerators(PyGrammar.grammarObj) # ______________________________________________________________________ # ParserError exception class ParserError (exceptions.Exception): """Class ParserError Exception class for parser errors (I assume). """ pass # ______________________________________________________________________ class STType (object): """Class STType """