def inifile_BNF(): global inibnf if not inibnf: # punctuation lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() equals = Literal("=").suppress() semi = Literal(";") comment = semi + Optional( restOfLine ) nonrbrack = "".join( [ c for c in printables if c != "]" ] ) + " \t" nonequals = "".join( [ c for c in printables if c != "=" ] ) + " \t" sectionDef = lbrack + Word( nonrbrack ) + rbrack keyDef = ~lbrack + Word( nonequals ) + equals + empty + restOfLine # strip any leading or trailing blanks from key def stripKey(tokens): tokens[0] = tokens[0].strip() keyDef.setParseAction(stripKey) # using Dict will allow retrieval of named data fields as attributes of the parsed results inibnf = Dict( ZeroOrMore( Group( sectionDef + Dict( ZeroOrMore( Group( keyDef ) ) ) ) ) ) inibnf.ignore( comment ) return inibnf
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word(nums) ipAddress = delimitedList(integer, ".", combine=True) timeZoneOffset = Word("+-", nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine(integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + timeZoneOffset + Suppress("]")) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word(alphas + nums + "@._")).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction( removeQuotes) + dblQuotedString.setResultsName( "clientSfw").setParseAction(removeQuotes)) return logLineBNF
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer ) + timeZoneOffset + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): elementList = [] for element in antlrAlternative.elements: rule = None if hasattr(element.atom, 'c1') and element.atom.c1 != '': regex = r'['+str(element.atom.c1[0])+'-'+str(element.atom.c2[0]+']') rule = Regex(regex)("anonymous_regex") elif hasattr(element, 'block') and element.block != '': rule = __antlrAlternativesConverter(pyparsingRules, element.block) else: ruleRef = element.atom assert ruleRef in pyparsingRules rule = pyparsingRules[element.atom](element.atom) if hasattr(element, 'op') and element.op != '': if element.op == '+': rule = Group(OneOrMore(rule))("anonymous_one_or_more") elif element.op == '*': rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more") elif element.op == '?': rule = Optional(rule) else: raise Exception('rule operator not yet implemented : ' + element.op) rule = rule elementList.append(rule) if len(elementList) > 1: rule = Group(And(elementList))("anonymous_and") else: rule = elementList[0] assert rule != None return rule
def parser(self): """ This function returns a parser. The grammar should be like most full text search engines (Google, Tsearch, Lucene). Grammar: - a query consists of alphanumeric words, with an optional '*' wildcard at the end of a word - a sequence of words between quotes is a literal string - words can be used together by using operators ('and' or 'or') - words with operators can be grouped with parenthesis - a word or group of words can be preceded by a 'not' operator - the 'and' operator precedes an 'or' operator - if an operator is missing, use an 'and' operator """ operatorOr = Forward() operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \ Group(Word(alphanums)).setResultsName('word') operatorQuotesContent = Forward() operatorQuotesContent << ( (operatorWord + operatorQuotesContent) | operatorWord) operatorQuotes = Group( Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName("quotes") | operatorWord operatorParenthesis = Group( (Suppress("(") + operatorOr + Suppress(")"))).setResultsName("parenthesis") | operatorQuotes operatorNot = Forward() operatorNot << ( Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName("not") | operatorParenthesis) operatorAnd = Forward() operatorAnd << ( Group(operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd).setResultsName("and") | Group(operatorNot + OneOrMore(~oneOf("and or") + operatorAnd) ).setResultsName("and") | operatorNot) operatorOr << ( Group(operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr).setResultsName("or") | operatorAnd) return operatorOr.parseString
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal( "." ) e = CaselessLiteral( "E" ) #~ fnumber = Combine( Word( "+-"+nums, nums ) + #~ Optional( point + Optional( Word( nums ) ) ) + #~ Optional( e + Word( "+-"+nums, nums ) ) ) fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") ident = Word(alphas, alphas+nums+"_$") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) expr = Forward() atom = ((0,None)*minus + ( pi | e | fnumber | ident + lpar + expr + rpar | ident ).setParseAction( pushFirst ) | Group( lpar + expr + rpar )).setParseAction(pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) bnf = expr return bnf
| | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 | +=======+======+======+======+======+======+======+======+======+ | min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = Word(nums).setParseAction(lambda t: int(t[0])) vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore(underline + "+")).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader.setResultsName("columns") + rowDelim rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = rowDelim datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print(data) print(data.asXML("DATA")) pprint.pprint(data.asList()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("sum(data['min']) =", sum(data['min']))
except ParseException as err: print(" "*err.loc + "^\n" + err.msg) print(err) print() # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) )
# whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK,NOT_OK = map(Literal,['ok','not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t:t[0].lstrip('- ')) TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split()) directive = Group(Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) )) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive") ) bailLine = Group(Literal("Bail out!")("BAIL") + empty + Optional(restOfLine)("reason")) tapOutputParser = Optional(Group(plan)("plan") + NL) & \
print("tokens.tables =", tokens.tables) print("tokens.where =", tokens.where) except ParseException as err: print(" " * err.loc + "^\n" + err.msg) print(err) print() # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums)))
to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction( lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\') ) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t:int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search("excl_range") boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word term << (Optional(field_name("field") + COLON) + (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) + Optional(boost))
m8 = castle_queenside | castle_kingside # castling e.g. o-o check = oneOf("+ ++") mate = Literal("#") annotation = Word("!?", max=2) nag = " $" + Word(nums) decoration = check | mate | annotation | nag variant = Forward() half_move = Combine((m3 | m1 | m2 | m4 | m5 | m6 | m7 | m8) + Optional(decoration)) \ + Optional(comment) +Optional(variant) move = Suppress(move_number) + half_move + Optional(half_move) variant << "(" + OneOrMore(move) + ")" # grouping the plies (half-moves) for each move: useful to group annotations, variants... # suggested by Paul McGuire :) move = Group(Suppress(move_number) + half_move + Optional(half_move)) variant << Group("(" + OneOrMore(move) + ")") game_terminator = oneOf("1-0 0-1 1/2-1/2 *") pgnGrammar = Suppress(ZeroOrMore(tag)) + ZeroOrMore(move) + Optional( Suppress(game_terminator)) def parsePGN(pgn, bnf=pgnGrammar, fn=None): try: return bnf.parseString(pgn) except ParseException as err: print(err.line) print(" " * (err.column - 1) + "^") print(err)
# # Copyright 2010, Paul McGuire # # A partial implementation of a parser of Excel formula expressions. # from pyparsingOD import (CaselessKeyword, Suppress, Word, alphas, alphanums, nums, Optional, Group, oneOf, Forward, Regex, operatorPrecedence, opAssoc, dblQuotedString, delimitedList, Combine, Literal, QuotedString) EQ,EXCL,LPAR,RPAR,COLON,COMMA = map(Suppress, '=!():,') EXCL, DOLLAR = map(Literal,"!$") sheetRef = Word(alphas, alphanums) | QuotedString("'",escQuote="''") colRef = Optional(DOLLAR) + Word(alphas,max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine(Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas,alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") + COMMA + expr("if_false") + RPAR)
SCOPE = Suppress('scope') OPTIONS = Suppress('options') + Suppress('{') # + WS_LOOP + Suppress('{') TOKENS = Suppress('tokens') + Suppress('{') # + WS_LOOP + Suppress('{') FRAGMENT = 'fragment'; TREE_BEGIN = Suppress('^(') ROOT = Suppress('^') BANG = Suppress('!') RANGE = Suppress('..') REWRITE = Suppress('->') # General Parser Definitions # Grammar heading optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s") option = Group(id("id") + Suppress('=') + optionValue("value"))("option") optionsSpec = OPTIONS + Group(OneOrMore(option + Suppress(';')))("options") + Suppress('}') tokenSpec = Group(TOKEN_REF("token_ref") + (Suppress('=') + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + Suppress(';') tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + Suppress('}') attrScope = Suppress('scope') + id + ACTION grammarType = Keyword('lexer') + Keyword('parser') + Keyword('tree') actionScopeName = id | Keyword('lexer')("l") | Keyword('parser')("p") action = Suppress('@') + Optional(actionScopeName + Suppress('::')) + id + ACTION grammarHeading = Optional(ML_COMMENT("ML_COMMENT")) + Optional(grammarType) + Suppress('grammar') + id("grammarName") + Suppress(';') + Optional(optionsSpec) + Optional(tokensSpec) + ZeroOrMore(attrScope) + ZeroOrMore(action) modifier = Keyword('protected') | Keyword('public') | Keyword('private') | Keyword('fragment') ruleAction = Suppress('@') + id + ACTION throwsSpec = Suppress('throws') + delimitedList(id) ruleScopeSpec = (Suppress('scope') + ACTION) | (Suppress('scope') + delimitedList(id) + Suppress(';')) | (Suppress('scope') + ACTION + Suppress('scope') + delimitedList(id) + Suppress(';')) unary_op = oneOf("^ !")
kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_ - ident + LBRACE + ZeroOrMore( Group(ident + EQ + integer + SEMI)) + RBRACE # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}' messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE # messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }*
else: if fn != None: print(fn( bnf.parseString( strg ) )) else: print() digits = "0123456789" # Version 1 element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") element = Word( alphas.upper(), alphas.lower(), max=2) elementRef = Group( element + Optional( Word( digits ), default="1" ) ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem]*int(qty) for elem,qty in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn ) test( formula, "NaCl", fn ) print() # Version 2 - access parsed items by field name elementRef = Group( element("symbol") + Optional( Word( digits ), default="1" )("qty") ) formula = OneOrMore( elementRef ) fn = lambda elemList : sum( [ atomicWeight[elem.symbol]*int(elem.qty) for elem in elemList ] ) test( formula, "H2O", fn ) test( formula, "C6H5OH", fn )
strBody + QUOTE) item = Forward() def assignUsing(s): def assignPA(tokens): if s in tokens: tokens[tokens[s]] = tokens[0] del tokens[s] return assignPA GROUP = (MARK + Group( ZeroOrMore((item + Optional(ATTRIBUTE)("attr")).setParseAction( assignUsing("attr")))) + (WORD("name") | UNMARK)).setParseAction( assignUsing("name")) item << (NUMBER | FLOAT | STRING | BLOB | GROUP) tests = """\ [ '10:1234567890' @name 25 @age +0.45 @percentage person:zed [ [ "hello" 1 child root [ "child" [ 200 '4:like' "I" "hello" things root [ [ "data" [ 2 1 ] @numbers child root [ [ 1 2 3 ] @test 4 5 6 root """.splitlines() for test in tests: if test: print(test)
Suppress, '{}()",@=#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+') # Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation any_name = Regex('[^\s"#%\'(),={}]+') # btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit
| | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 | +=======+======+======+======+======+======+======+======+======+ | min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable underline = Word("-=") number = Word(nums).setParseAction( lambda t : int(t[0]) ) vert = Literal("|").suppress() rowDelim = ("+" + ZeroOrMore( underline + "+" ) ).suppress() columnHeader = Group(vert + vert + delimitedList(Word(alphas + nums), "|") + vert) heading = rowDelim + columnHeader.setResultsName("columns") + rowDelim rowData = Group( vert + Word(alphas) + vert + delimitedList(number,"|") + vert ) trailing = rowDelim datatable = heading + Dict( ZeroOrMore(rowData) ) + trailing # now parse data and print results data = datatable.parseString(testData) print(data) print(data.asXML("DATA")) pprint.pprint(data.asList()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("sum(data['min']) =", sum(data['min']))
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word( alphas, alphanums + "_" ).setName("identifier") #~ real = Combine( Word(nums+"+-", nums) + dot + Optional( Word(nums) ) #~ + Optional( CaselessLiteral("E") + Word(nums+"+-",nums) ) ) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") #~ integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | #~ Word( nums+"+-", nums ) ).setName("int") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList( identifier, "::", combine=True ).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = ( any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName ).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group( sequence_ + langle + ( sequenceDef | typeName ) + rangle ) typeDef = sequenceDef | ( typeName + Optional( lbrack + integer + rbrack ) ) typedefDef = Group( typedef_ + typeDef + identifier + semi ).setName("typedef") moduleDef = Forward() constDef = Group( const_ + typeDef + identifier + equals + ( real | integer | quotedString ) + semi ) #| quotedString ) exceptionItem = Group( typeDef + identifier + semi ) exceptionDef = ( exception_ + identifier + lbrace + ZeroOrMore( exceptionItem ) + rbrace + semi ) attributeDef = Optional( readonly_ ) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group( ( inout_ | in_ | out_ ) + typeName + identifier ) ).setName( "paramlist" ) operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = ( constDef | exceptionDef | attributeDef | operationDef ) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = ( interfaceDef | exceptionDef | constDef | typedefDef | moduleDef ) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem ) + rbrace + semi bnf = ( moduleDef | OneOrMore( moduleItem ) ) singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug() identifier = identifier1 | identifier2 assert(identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( LBRACE + delimitedList( expr ) + RBRACE ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK) subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax") primary = ( number | (LPAR + mintypmaxExpr + RPAR ) | ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + COLON + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR ) eventExpr << ( Group( delimitedList( eventTerm, Keyword("or") ) ) ) eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI ) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI ) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI ) regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) | ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + SEMI ) eventDecl = Group( "event" + delimitedList( identifier ) + SEMI ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + LPAR + expr + RPAR + stmt ) | ( while_ + LPAR + expr + RPAR + stmt ) | ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + LPAR + expr + RPAR + stmtOrNull ) | ( "->" + identifier + SEMI ) | ( disable + identifier + SEMI ) | ( assign + assgnmt + SEMI ) | ( deassign + lvalue + SEMI ) | ( force + assgnmt + SEMI ) | ( release + lvalue + SEMI ) | ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + SEMI ) | ( nbAssgnmt + SEMI ) | ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + SEMI + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + SEMI ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + LPAR + Group( delimitedList( expr ) ) + RPAR ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + SEMI ).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR ) namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug() assert(r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( LPAR + (delimitedList( namedPortConnection ) | delimitedList( modulePortConnection )) + RPAR).setName("inst_args") moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug() moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + SEMI ).setName("moduleInstantiation") parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI ) task = Group( "task" + identifier + SEMI + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI ) pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR ) pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck2 = Group( "$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck3 = Group( "$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck4 = Group( "$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck5 = Group( "$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck6 = Group( "$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck7 = Group( "$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE ) port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier + Optional( LPAR + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + RPAR ) + SEMI ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + COLON + outputSymbol + SEMI ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
CALENDAR = Literal("VCALENDAR").suppress() ALARM = Literal("VALARM").suppress() # TOKENS CALPROP = oneOf("VERSION PRODID METHOD") ALMPROP = oneOf("TRIGGER") EVTPROP = oneOf("X-MOZILLA-RECUR-DEFAULT-INTERVAL \ X-MOZILLA-RECUR-DEFAULT-UNITS \ UID DTSTAMP LAST-MODIFIED X RRULE EXDATE") propval = Word(valstr) typeval = Word(valstr) typename = oneOf("VALUE MEMBER FREQ UNTIL INTERVAL") proptype = Group(SEMI + typename + EQ + typeval).suppress() calprop = Group(CALPROP + ZeroOrMore(proptype) + COLON + propval) almprop = Group(ALMPROP + ZeroOrMore(proptype) + COLON + propval) evtprop = Group(EVTPROP + ZeroOrMore(proptype) + COLON + propval).suppress() \ | "CATEGORIES" + COLON + propval.setResultsName("categories") \ | "CLASS" + COLON + propval.setResultsName("class") \ | "DESCRIPTION" + COLON + propval.setResultsName("description") \ | "DTSTART" + proptype + COLON + propval.setResultsName("begin") \ | "DTEND" + proptype + COLON + propval.setResultsName("end") \ | "LOCATION" + COLON + propval.setResultsName("location") \ | "PRIORITY" + COLON + propval.setResultsName("priority") \ | "STATUS" + COLON + propval.setResultsName("status") \ | "SUMMARY" + COLON + propval.setResultsName("summary") \ | "URL" + COLON + propval.setResultsName("url") \
| min | 7 | 43 | 7 | 15 | 82 | 98 | 1 | 37 | | max | 11 | 52 | 10 | 17 | 85 | 112 | 4 | 39 | | ave | 9 | 47 | 8 | 16 | 84 | 106 | 3 | 38 | | sdev | 1 | 3 | 1 | 1 | 1 | 3 | 1 | 1 | +-------+------+------+------+------+------+------+------+------+ """ # define grammar for datatable heading = ( Literal( "+-------+------+------+------+------+------+------+------+------+") + "| | A1 | B1 | C1 | D1 | A2 | B2 | C2 | D2 |" + "+=======+======+======+======+======+======+======+======+======+" ).suppress() vert = Literal("|").suppress() number = Word(nums) rowData = Group(vert + Word(alphas) + vert + delimitedList(number, "|") + vert) trailing = Literal( "+-------+------+------+------+------+------+------+------+------+" ).suppress() datatable = heading + Dict(ZeroOrMore(rowData)) + trailing # now parse data and print results data = datatable.parseString(testData) print(data) pprint.pprint(data.asList()) print("data keys=", list(data.keys())) print("data['min']=", data['min']) print("data.max", data.max)
# # cLibHeader.py # # A simple parser to extract API doc info from a C header file # # Copyright, 2012 - Paul McGuire # from pyparsingOD import Word, alphas, alphanums, Combine, oneOf, Optional, delimitedList, Group, Keyword testdata = """ int func1(float *vec, int len, double arg1); int func2(float **arr, float *vec, int len, double arg1, double arg2); """ ident = Word(alphas, alphanums + "_") vartype = Combine(oneOf("float double int char") + Optional(Word("*")), adjacent=False) arglist = delimitedList(Group(vartype("type") + ident("name"))) functionCall = Keyword("int") + ident("name") + "(" + arglist( "args") + ")" + ";" for fn, s, e in functionCall.scanString(testdata): print(fn.name) for a in fn.args: print(" - %(name)s (%(type)s)" % a)
# The parse action on this production converts repetitions of constants into a single string. pound_char = Combine( OneOrMore((Literal("#").suppress() + Word(nums)).setParseAction(lambda s, l, t: to_chr(int(t[0]))))) # This is the second part of the hack. It combines the various "unquoted" # partial strings into a single one. Then, the parse action puts # a single matched pair of quotes around it. delphi_string = Combine( OneOrMore(CONCAT | pound_char | unquoted_sglQuotedString), adjacent=False).setParseAction(lambda s, l, t: "'%s'" % t[0]) string_value = delphi_string | base16_value list_value = LBRACE + Optional( Group(delimitedList(identifier | number_value | string_value))) + RBRACE paren_list_value = LPAREN + ZeroOrMore(identifier | number_value | string_value) + RPAREN item_list_entry = ITEM + ZeroOrMore(attribute_value_pair) + END item_list = LANGLE + ZeroOrMore(item_list_entry) + RANGLE generic_value = identifier value = item_list | number_value | string_value | list_value | paren_list_value | generic_value category_attribute = CATEGORIES + PERIOD + oneOf( "strings itemsvisibles visibles", True) event_attribute = oneOf( "onactivate onclosequery onclose oncreate ondeactivate onhide onshow", True) font_attribute = FONT + PERIOD + oneOf("charset color height name style", True)