def inifile_BNF(): global inibnf if not inibnf: # punctuation lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() equals = Literal("=").suppress() semi = Literal(";") comment = semi + Optional( restOfLine ) nonrbrack = "".join( [ c for c in printables if c != "]" ] ) + " \t" nonequals = "".join( [ c for c in printables if c != "=" ] ) + " \t" sectionDef = lbrack + Word( nonrbrack ) + rbrack keyDef = ~lbrack + Word( nonequals ) + equals + empty + restOfLine # strip any leading or trailing blanks from key def stripKey(tokens): tokens[0] = tokens[0].strip() keyDef.setParseAction(stripKey) # using Dict will allow retrieval of named data fields as attributes of the parsed results inibnf = Dict( ZeroOrMore( Group( sectionDef + Dict( ZeroOrMore( Group( keyDef ) ) ) ) ) ) inibnf.ignore( comment ) return inibnf
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word(nums) ipAddress = delimitedList(integer, ".", combine=True) timeZoneOffset = Word("+-", nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine(integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + timeZoneOffset + Suppress("]")) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction( removeQuotes) + dblQuotedString.setResultsName( "clientSfw").setParseAction(removeQuotes)) return logLineBNF
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join( c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ]) _parser = reExpr return _parser
def parser(self): """ This function returns a parser. The grammar should be like most full text search engines (Google, Tsearch, Lucene). Grammar: - a query consists of alphanumeric words, with an optional '*' wildcard at the end of a word - a sequence of words between quotes is a literal string - words can be used together by using operators ('and' or 'or') - words with operators can be grouped with parenthesis - a word or group of words can be preceded by a 'not' operator - the 'and' operator precedes an 'or' operator - if an operator is missing, use an 'and' operator """ operatorOr = Forward() operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \ Group(Word(alphanums)).setResultsName('word') operatorQuotesContent = Forward() operatorQuotesContent << ( (operatorWord + operatorQuotesContent) | operatorWord) operatorQuotes = Group( Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName("quotes") | operatorWord operatorParenthesis = Group( (Suppress("(") + operatorOr + Suppress(")"))).setResultsName("parenthesis") | operatorQuotes operatorNot = Forward() operatorNot << ( Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName("not") | operatorParenthesis) operatorAnd = Forward() operatorAnd << ( Group(operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd).setResultsName("and") | Group(operatorNot + OneOrMore(~oneOf("and or") + operatorAnd) ).setResultsName("and") | operatorNot) operatorOr << ( Group(operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr).setResultsName("or") | operatorAnd) return operatorOr.parseString
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal( "." ) e = CaselessLiteral( "E" ) #~ fnumber = Combine( Word( "+-"+nums, nums ) + #~ Optional( point + Optional( Word( nums ) ) ) + #~ Optional( e + Word( "+-"+nums, nums ) ) ) fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") ident = Word(alphas, alphas+nums+"_$") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) expr = Forward() atom = ((0,None)*minus + ( pi | e | fnumber | ident + lpar + expr + rpar | ident ).setParseAction( pushFirst ) | Group( lpar + expr + rpar )).setParseAction(pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) bnf = expr return bnf
# vim:fileencoding=utf-8 # # greetingInKorean.py # # Demonstration of the parsing module, on the prototypical "Hello, World!" example # from pyparsingOD import Word, srange koreanChars = srange(r"[\0xac00-\0xd7a3]") koreanWord = Word(koreanChars,min=2) # define grammar greet = koreanWord + "," + koreanWord + "!" # input string hello = '\uc548\ub155, \uc5ec\ub7ec\ubd84!' #"Hello, World!" in Korean # parse input string print(greet.parseString( hello ))
This means that just about anything can be an attribute, unlike in XML. BLOB A BLOB is unique to Stackish and allows you to record any content (even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return ""
# excelExpr.py # # Copyright 2010, Paul McGuire # # A partial implementation of a parser of Excel formula expressions. # from pyparsingOD import (CaselessKeyword, Suppress, Word, alphas, alphanums, nums, Optional, Group, oneOf, Forward, Regex, operatorPrecedence, opAssoc, dblQuotedString, delimitedList, Combine, Literal, QuotedString) EQ,EXCL,LPAR,RPAR,COLON,COMMA = map(Suppress, '=!():,') EXCL, DOLLAR = map(Literal,"!$") sheetRef = Word(alphas, alphanums) | QuotedString("'",escQuote="''") colRef = Optional(DOLLAR) + Word(alphas,max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine(Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas,alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") +
foreign key (student_id) references students(student_id); alter table only student_registrations add constraint classes_link foreign key (class_id) references classes(class_id); """.upper() from pyparsingOD import Literal, CaselessLiteral, Word, delimitedList \ ,Optional, Combine, Group, alphas, nums, alphanums, Forward \ , oneOf, sglQuotedString, OneOrMore, ZeroOrMore, CharsNotIn \ , replaceWith skobki = "(" + ZeroOrMore(CharsNotIn(")")) + ")" field_def = OneOrMore(Word(alphas, alphanums + "_\"':-") | skobki) def field_act(s, loc, tok): return ("<" + tok[0] + "> " + " ".join(tok)).replace("\"", "\\\"") field_def.setParseAction(field_act) field_list_def = delimitedList(field_def) def field_list_act(toks): return " | ".join(toks)
def _pushFirst( str, loc, toks ): if debug_flag: print("pushing ", toks[0], "str is ", str) exprStack.append( toks[0] ) def _assignVar( str, loc, toks ): global targetvar targetvar = toks[0] #----------------------------------------------------------------------------- # The following statements define the grammar for the parser. point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine( Optional(plusorminus) + number ) floatnumber = Combine( integer + Optional( point + Optional(number) ) + Optional( e + integer ) ) lbracket = Literal("[") rbracket = Literal("]") ident = Forward() ## The definition below treats array accesses as identifiers. This means your expressions ## can include references to array elements, rows and columns, e.g., a = b[i] + 5. ## Expressions within []'s are not presently supported, so a = b[i+1] will raise ## a ParseException. ident = Combine(Word(alphas + '-',alphanums + '_') + \ ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \
print("tokens = ", tokens) print("tokens.columns =", tokens.columns) print("tokens.tables =", tokens.tables) print("tokens.where =", tokens.where) except ParseException as err: print(" " * err.loc + "^\n" + err.msg) print(err) print() # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) +
# vim:fileencoding=utf-8 # # greetingInGreek.py # # Demonstration of the parsing module, on the prototypical "Hello, World!" example # from pyparsingOD import Word # define grammar alphas = ''.join(chr(x) for x in range(0x386, 0x3ce)) greet = Word(alphas) + ',' + Word(alphas) + '!' # input string hello = "Καλημέρα, κόσμε!".decode('utf-8') # parse input string print(greet.parseString( hello ))
# protobuf_parser.py # # simple parser for parsing protobuf .proto files # # Copyright 2010, Paul McGuire # from pyparsingOD import (Word, alphas, alphanums, Regex, Suppress, Forward, Group, oneOf, ZeroOrMore, Optional, delimitedList, Keyword, restOfLine, quotedString) ident = Word(alphas + "_", alphanums + "_").setName("identifier") integer = Regex(r"[+-]?\d+") LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident
testData = """ #define MAX_LOCS=100 #define USERNAME = "******" #define PASSWORD = "******" a = MAX_LOCS; CORBA::initORB("xyzzy", USERNAME, PASSWORD ); """ ################# print("Example of an extractor") print("----------------------") # simple grammar to match #define's ident = Word(alphas, alphanums+"_") macroDef = Literal("#define") + ident.setResultsName("name") + "=" + restOfLine.setResultsName("value") for t,s,e in macroDef.scanString( testData ): print(t.name,":", t.value) # or a quick way to make a dictionary of the names and values # (return only key and value tokens, and construct dict from key-value pairs) # - empty ahead of restOfLine advances past leading whitespace, does implicit lstrip during parsing macroDef = Suppress("#define") + ident + Suppress("=") + empty + restOfLine macros = dict(list(macroDef.searchString(testData))) print("macros =", macros) print() ################# print("Examples of a transformer")
# # A simple example showing the use of the implied listAllMatches=True for # results names with a trailing '*' character. # # This example performs work similar to itertools.groupby, but without # having to sort the input first. # from pyparsingOD import Word, ZeroOrMore, nums aExpr = Word("A", nums) bExpr = Word("B", nums) cExpr = Word("C", nums) grammar = ZeroOrMore(aExpr("A*") | bExpr("B*") | cExpr("C*")) results = grammar.parseString("A1 B1 A2 C1 B2 A3") print(results.dump())
print(pe) else: if fn != None: print(fn( bnf.parseString( strg ) )) else: print() digits = "0123456789" # Version 1 element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") element = Word( alphas.upper(), alphas.lower(), max=2) elementRef = Group( element + Optional( Word( digits ), default="1" ) ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print() # Version 2 - access parsed items by field name elementRef = Group( element("symbol") + Optional( Word( digits ), default="1" )("qty") ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] ) test( formula, "H2O", fn )
def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return ""
def eval(self): val1 = self.value[0].eval() for op, val in operatorOperands(self.value[1:]): fn = EvalComparisonOp.opMap[op] val2 = val.eval() if not fn(val1, val2): break val1 = val2 else: return True return False # define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) variable = Word(alphas, exact=1) operand = real | integer | variable signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal('**') # use parse actions to attach EvalXXX constructors to sub-expressions operand.setParseAction(EvalConstant) arith_expr = operatorPrecedence(operand, [ (signop, 1, opAssoc.RIGHT, EvalSignOp), (expop, 2, opAssoc.LEFT, EvalPowerOp), (multop, 2, opAssoc.LEFT, EvalMultOp),
| min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable heading = ( Literal( "+-------+------+------+------+------+------+------+------+------+") + "| | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 |" + "+=======+======+======+======+======+======+======+======+======+" ).suppress() vert = Literal("|").suppress() number = Word(nums) rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = Literal( "+-------+------+------+------+------+------+------+------+------+" ).suppress() datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print(data) pprint.pprint(data.asList()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("data.max", data.max)
# greeting.py # # Demonstration of the pyparsing module, on the prototypical "Hello, World!" # example # # Copyright 2003, by Paul McGuire # from pyparsingOD import Word, alphas # define grammar greet = Word( alphas ) + "," + Word( alphas ) + "!" # input string hello = "Hello, World!" # parse input string print(hello, "->", greet.parseString( hello ))
# #import imputil import sys import os import types import urllib.parse DEBUG = False from pyparsingOD import Word, Group, ZeroOrMore, alphas, \ alphanums, ParserElement, ParseException, ParseSyntaxException, \ Empty, LineEnd, OneOrMore, col, Keyword, pythonStyleComment, \ StringEnd, traceParseAction ident = Word(alphas+"_", alphanums+"_$") pythonKeywords = """and as assert break class continue def del elif else except exec finally for from global if import in is lambda None not or pass print raise return try while with yield True False""" pythonKeywords = set(pythonKeywords.split()) def no_keywords_allowed(s,l,t): wd = t[0] if wd in pythonKeywords: errmsg = "cannot not use keyword '%s' " \ "as an identifier" % wd raise ParseException(s,l,errmsg) ident.setParseAction(no_keywords_allowed) stateTransition = ident("fromState") + "->" + ident("toState")
evalop = any class BoolNot(object): def __init__(self,t): self.arg = t[0][1] def __bool__(self): v = bool(self.arg) return not v def __str__(self): return "~" + str(self.arg) __repr__ = __str__ __nonzero__ = __bool__ TRUE = Keyword("True") FALSE = Keyword("False") boolOperand = TRUE | FALSE | Word(alphas,max=1) boolOperand.setParseAction(BoolOperand) # define expression, based on expression operand and # list of operations in precedence order boolExpr = infixNotation( boolOperand, [ ("not", 1, opAssoc.RIGHT, BoolNot), ("and", 2, opAssoc.LEFT, BoolAnd), ("or", 2, opAssoc.LEFT, BoolOr), ]) if __name__ == "__main__": p = True q = False
from pyparsingOD import Literal, Word, Group, Dict, ZeroOrMore, alphas, nums, delimitedList import pprint testData = """ +-------+------+------+------+------+------+------+------+------+ | | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 | +=======+======+======+======+======+======+======+======+======+ | min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = Word(nums).setParseAction(lambda t: int(t[0])) vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore(underline + "+")).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader.setResultsName("columns") + rowDelim rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = rowDelim datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData)
Created on 4 sept. 2010 @author: luca (Minor updates by Paul McGuire, June, 2012) ''' from pyparsingOD import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums+'_') RULE_REF = Word(alphas.lower(), alphanums+'_') ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1)) ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'") ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"') SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF
outOfRangeMessage = { (True, False) : "value %%s is greater than %s" % maxval, (False, True) : "value %%s is less than %s" % minval, (False, False) : "value %%s is not in the range (%s to %s)" % (minval,maxval), }[minval is None, maxval is None] # define the actual range checking parse action def rangeCheckParseAction(string, loc, tokens): parsedval = tokens[0] if not inRangeFn(parsedval): raise ParseException(string, loc, outOfRangeMessage % parsedval) return rangeCheckParseAction # define the expressions for a date of the form YYYY/MM/DD or YYYY/MM (assumes YYYY/MM/01) integer = Word(nums).setName("integer") integer.setParseAction(lambda t:int(t[0])) month = integer.copy().addParseAction(rangeCheck(1,12)) day = integer.copy().addParseAction(rangeCheck(1,31)) year = integer.copy().addParseAction(rangeCheck(2000, None)) SLASH = Suppress('/') dateExpr = year("year") + SLASH + month("month") + Optional(SLASH + day("day")) dateExpr.setName("date") # convert date fields to datetime (also validates dates as truly valid dates) dateExpr.setParseAction(lambda t: datetime(t.year, t.month, t.day or 1).date()) # add range checking on dates mindate = datetime(2002,1,1).date()
This means that just about anything can be an attribute, unlike in XML. BLOB A BLOB is unique to Stackish and allows you to record any content (even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK,UNMARK,AT,COLON,QUOTE = map(Suppress,"[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t:int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t:float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas,alphanums+"_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE) item = Forward()
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word( alphas, alphanums + "_" ).setName("identifier") #~ real = Combine( Word(nums+"+-", nums) + dot + Optional( Word(nums) ) #~ + Optional( CaselessLiteral("E") + Word(nums+"+-",nums) ) ) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") #~ integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | #~ Word( nums+"+-", nums ) ).setName("int") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList( identifier, "::", combine=True ).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = ( any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName ).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group( sequence_ + langle + ( sequenceDef | typeName ) + rangle ) typeDef = sequenceDef | ( typeName + Optional( lbrack + integer + rbrack ) ) typedefDef = Group( typedef_ + typeDef + identifier + semi ).setName("typedef") moduleDef = Forward() constDef = Group( const_ + typeDef + identifier + equals + ( real | integer | quotedString ) + semi ) #| quotedString ) exceptionItem = Group( typeDef + identifier + semi ) exceptionDef = ( exception_ + identifier + lbrace + ZeroOrMore( exceptionItem ) + rbrace + semi ) attributeDef = Optional( readonly_ ) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group( ( inout_ | in_ | out_ ) + typeName + identifier ) ).setName( "paramlist" ) operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = ( constDef | exceptionDef | attributeDef | operationDef ) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = ( interfaceDef | exceptionDef | constDef | typedefDef | moduleDef ) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem ) + rbrace + semi bnf = ( moduleDef | OneOrMore( moduleItem ) ) singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
# The aim of this parser is not to support database application, # but to create automagically a pgn annotated reading the log console file # of a lecture of ICC (Internet Chess Club), saved by Blitzin. # Of course you can modify the Abstract Syntax Tree to your purpose. # # Copyright 2004, by Alberto Santini http://www.albertosantini.it/chess/ # from pyparsingOD import alphanums, nums, quotedString from pyparsingOD import Combine, Forward, Group, Literal, oneOf, OneOrMore, Optional, Suppress, ZeroOrMore, White, Word from pyparsingOD import ParseException # # define pgn grammar # tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]") comment = Suppress("{") + Word(alphanums + " ") + Suppress("}") dot = Literal(".") piece = oneOf("K Q B N R") file_coord = oneOf("a b c d e f g h") rank_coord = oneOf("1 2 3 4 5 6 7 8") capture = oneOf("x :") promote = Literal("=") castle_queenside = Literal("O-O-O") | Literal("0-0-0") | Literal("o-o-o") castle_kingside = Literal("O-O") | Literal("0-0") | Literal("o-o") move_number = Optional(comment) + Word(nums) + dot m1 = file_coord + rank_coord # pawn move e.g. d4 m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5 m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug() identifier = identifier1 | identifier2 assert(identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( LBRACE + delimitedList( expr ) + RBRACE ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK) subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax") primary = ( number | (LPAR + mintypmaxExpr + RPAR ) | ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + COLON + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR ) eventExpr << ( Group( delimitedList( eventTerm, Keyword("or") ) ) ) eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI ) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI ) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI ) regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) | ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + SEMI ) eventDecl = Group( "event" + delimitedList( identifier ) + SEMI ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + LPAR + expr + RPAR + stmt ) | ( while_ + LPAR + expr + RPAR + stmt ) | ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + LPAR + expr + RPAR + stmtOrNull ) | ( "->" + identifier + SEMI ) | ( disable + identifier + SEMI ) | ( assign + assgnmt + SEMI ) | ( deassign + lvalue + SEMI ) | ( force + assgnmt + SEMI ) | ( release + lvalue + SEMI ) | ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + SEMI ) | ( nbAssgnmt + SEMI ) | ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + SEMI + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + SEMI ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + LPAR + Group( delimitedList( expr ) ) + RPAR ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + SEMI ).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR ) namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug() assert(r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( LPAR + (delimitedList( namedPortConnection ) | delimitedList( modulePortConnection )) + RPAR).setName("inst_args") moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug() moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + SEMI ).setName("moduleInstantiation") parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI ) task = Group( "task" + identifier + SEMI + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI ) pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR ) pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck2 = Group( "$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck3 = Group( "$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck4 = Group( "$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck5 = Group( "$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck6 = Group( "$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck7 = Group( "$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE ) port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier + Optional( LPAR + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + RPAR ) + SEMI ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + COLON + outputSymbol + SEMI ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
return opn[op]( op1, op2 ) elif op == "PI": return math.pi # 3.1415926535 elif op == "E": return math.e # 2.718281828 elif op in fn: return fn[op]( evaluateStack( s ) ) elif op[0].isalpha(): if op in variables: return variables[op] raise Exception("invalid identifier '%s'" % op) else: return float( op ) arithExpr = BNF() ident = Word(alphas, alphanums).setName("identifier") assignment = ident("varname") + '=' + arithExpr pattern = assignment | arithExpr if __name__ == '__main__': # input_string input_string='' # Display instructions on how to quit the program print("Type in the string to be parsed or 'quit' to exit the program") input_string = input("> ") while input_string != 'quit': if input_string.lower() == 'debug': debug_flag=True input_string = input("> ")
# getNTPservers.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004, by Paul McGuire # from pyparsingOD import Word, Combine, Suppress, CharsNotIn, nums import urllib.request, urllib.parse, urllib.error integer = Word(nums) ipAddress = Combine(integer + "." + integer + "." + integer + "." + integer) tdStart = Suppress("<td>") tdEnd = Suppress("</td>") timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \ tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd # get list of time servers nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html" serverListPage = urllib.request.urlopen(nistTimeServerURL) serverListHTML = serverListPage.read() serverListPage.close() addrs = {} for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): print(srvr.ipAddr, "-", srvr.loc) addrs[srvr.ipAddr] = srvr.loc # or do this: #~ addr,loc = srvr #~ print addr, "-", loc
SEMI = Literal(";").suppress() COLON = Literal(":").suppress() EVENT = Literal("VEVENT").suppress() CALENDAR = Literal("VCALENDAR").suppress() ALARM = Literal("VALARM").suppress() # TOKENS CALPROP = oneOf("VERSION PRODID METHOD") ALMPROP = oneOf("TRIGGER") EVTPROP = oneOf("X-MOZILLA-RECUR-DEFAULT-INTERVAL \ X-MOZILLA-RECUR-DEFAULT-UNITS \ UID DTSTAMP LAST-MODIFIED X RRULE EXDATE") propval = Word(valstr) typeval = Word(valstr) typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL") proptype = Group(SEMI + typename + EQ + typeval).suppress() calprop = Group(CALPROP + ZeroOrMore(proptype) + COLON + propval) almprop = Group(ALMPROP + ZeroOrMore(proptype) + COLON + propval) evtprop = Group(EVTPROP + ZeroOrMore(proptype) + COLON + propval).suppress() \ | "CATEGORIES" + COLON + propval.setResultsName("categories") \ | "CLASS" + COLON + propval.setResultsName("class") \ | "DESCRIPTION" + COLON + propval.setResultsName("description") \ | "DTSTART" + proptype + COLON + propval.setResultsName("begin") \ | "DTEND" + proptype + COLON + propval.setResultsName("end") \ | "LOCATION" + COLON + propval.setResultsName("location") \ | "PRIORITY" + COLON + propval.setResultsName("priority") \
# # cLibHeader.py # # A simple parser to extract API doc info from a C header file # # Copyright, 2012 - Paul McGuire # from pyparsingOD import Word, alphas, alphanums, Combine, oneOf, Optional, delimitedList, Group, Keyword testdata = """ int func1(float *vec, int len, double arg1); int func2(float **arr, float *vec, int len, double arg1, double arg2); """ ident = Word(alphas, alphanums + "_") vartype = Combine(oneOf("float double int char") + Optional(Word("*")), adjacent=False) arglist = delimitedList(Group(vartype("type") + ident("name"))) functionCall = Keyword("int") + ident("name") + "(" + arglist( "args") + ")" + ";" for fn, s, e in functionCall.scanString(testdata): print(fn.name) for a in fn.args: print(" - %(name)s (%(type)s)" % a)
LPAREN = Literal("(").suppress() PERIOD = Literal(".").suppress() RANGLE = Literal(">").suppress() RBRACE = Literal("]").suppress() RPAREN = Literal(")").suppress() CATEGORIES = CaselessLiteral("categories").suppress() END = CaselessLiteral("end").suppress() FONT = CaselessLiteral("font").suppress() HINT = CaselessLiteral("hint").suppress() ITEM = CaselessLiteral("item").suppress() OBJECT = CaselessLiteral("object").suppress() attribute_value_pair = Forward() # this is recursed in item_list_entry simple_identifier = Word(alphas, alphanums + "_") identifier = Combine(simple_identifier + ZeroOrMore(Literal(".") + simple_identifier)) object_name = identifier object_type = identifier # Integer and floating point values are converted to Python longs and floats, respectively. int_value = Combine(Optional("-") + Word(nums)).setParseAction(lambda s, l, t: [int(t[0])]) float_value = Combine(Optional("-") + Optional(Word(nums)) + "." + Word(nums)).setParseAction(lambda s, l, t: [float(t[0])]) number_value = float_value | int_value # Base16 constants are left in string form, including the surrounding braces. base16_value = Combine( Literal("{") + OneOrMore(Word("0123456789ABCDEFabcdef")) + Literal("}"),