def define_document_grammar(self): """ Define document grammar rules """ """ Backus Naur Form (BNF) chars ::= a-zA-Z numbers ::= 0-1 word ::= chars|numbers+ number ::= numbers+ quote ::= '"' comma ::= ',' name ::= quote word+ quote class ::= number+ age ::= number+ sex ::= word survived ::= number entry ::= name class agen sex survived """ quote = Literal('"') comma = Literal(',') name = Suppress(quote) + OneOrMore(Word(alphas) | Suppress(comma)) + Suppress(quote) ship_class = Word(alphanums) age = Word(nums) sex = Word(alphanums) survived = Word(nums) entry = name.setResultsName('name') + ship_class.setResultsName('ship_class') \ + age.setResultsName('age') + sex.setResultsName('sex') \ + survived.setResultsName('survived') # store final expression self.final_expression = entry
def _create_parser(self): (TRANSITIONS, INITIAL_STATE, END_STATE, STATES, PARALLEL) = list( map( CaselessKeyword, """ transitions initial_state end_state states parallel""".split( ))) semicolon = Suppress(Word(";")) op = Suppress(Word("{")) cl = Suppress(Word("}")) to = Suppress(CaselessLiteral("=>")) identifier = Word(alphas + "_", alphanums + "_") list_identifiers = delimitedList(identifier) # parse States stateslist = Group(Suppress(STATES) + list_identifiers + semicolon).setResultsName('states') # parse Transitions transition = identifier.setResultsName( 'src') + to + list_identifiers.setResultsName('dests') + semicolon transitions_list = Group(OneOrMore( Group(transition))).setResultsName("transitions") transitions = Suppress( TRANSITIONS) + op + transitions_list + cl + semicolon # parse initialstate and finalstate initialstate = Suppress(INITIAL_STATE) + identifier.setResultsName( 'initialstate') + semicolon finalstate = Suppress(END_STATE) + identifier.setResultsName( 'finalstate') + semicolon # parse machine contents = stateslist | initialstate | finalstate | transitions machine_content = op + ZeroOrMore(contents) + cl + semicolon parent = Suppress(Word(":")) + identifier.setResultsName('parent') substate = parent + Optional( PARALLEL.setResultsName('parallel').setParseAction(lambda t: True) ) + Group(machine_content).setResultsName("contents") machine = identifier.setResultsName("name") + Group( machine_content).setResultsName("contents") + Group( ZeroOrMore(Group(substate))).setResultsName("substates") SMDSL = Group( machine.ignore(cppStyleComment)).setResultsName("machine") return SMDSL
def expr(self) -> ParserElement: INTENSITY = Word(nums) ALPHA = Word(nums + ".") SEP = "," + Optional(White()) RGBA = (CaselessLiteral("rgba(") + INTENSITY.setResultsName("red") + SEP + INTENSITY.setResultsName("green") + SEP + INTENSITY.setResultsName("blue") + SEP + ALPHA + ")") COLOR = Word("#", hexnums) ^ Word(alphas) ^ RGBA expr = Combine( "{color:" + COLOR.setResultsName("color") + "}" + SkipTo("{color}").setResultsName("text") + "{color}", ) return expr.setParseAction(self.action)
def nexus_iter(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict ## beginblock = Suppress(CaselessKeyword("begin") + ## CaselessKeyword("trees") + ";") ## endblock = Suppress((CaselessKeyword("end") | ## CaselessKeyword("endblock")) + ";") comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) ## translate = CaselessKeyword("translate").suppress() name = Word(string.letters+string.digits+"_.") | QuotedString("'") ## ttrec = Group(Word(string.digits).setResultsName("number") + ## name.setResultsName("name") + ## Optional(",").suppress()) ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";")) newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def not_begin(s): return s.strip().lower() != "begin trees;" def not_end(s): return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} while True: s = f.next().strip() if not s: continue if s.lower() == ";": break if s[-1] == ",": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = f.next().strip().lower() if s != "begin trees;": print sys.stderr, "Expecting 'begin trees;', got %s" % s raise StopIteration ttable = {} while True: try: s = f.next().strip() except StopIteration: break if not s: continue if s.lower() == "translate": ttable = parse_ttable(f) print "ttable: %s" % len(ttable) elif s.split()[0].lower()=='tree': match = tree.parseString(s) yield nexus.Newick(match, ttable)
def parse_morse_letter(line): """Parse a line of input from the morse code table file and convert it into a parsetree.""" letter = Word(alphas) token = Word(alphas) morse_letter_expression = letter.setResultsName('letter')\ + OneOrMore(token).setResultsName('tokens') return morse_letter_expression.parseString(line)
def parse_ampersand_comment(s): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict word = Word(string.letters + string.digits + "%_") key = word.setResultsName("key") + Suppress("=") single_value = (Word(string.letters + string.digits + "-.") | QuotedString("'") | QuotedString('"')) range_value = Group( Suppress("{") + single_value.setResultsName("min") + Suppress(",") + single_value.setResultsName("max") + Suppress("}")) pair = (key + (single_value | range_value).setResultsName("value")) g = OneOrMore(pair) d = [] for x in g.searchString(s): v = x.value if type(v) == str: try: v = float(v) except ValueError: pass else: try: v = map(float, v.asList()) except ValueError: pass d.append((x.key, v)) return d
def parse_connection_str(connstr): ## Grammar for connection syntax digits = "0123456789" othervalid = "_.@" identifier = Word(alphas + digits + othervalid) nodename = identifier.setResultsName('nodename') outputnames = delimitedList(identifier).setResultsName('outputnames') inputnames = delimitedList(identifier).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode = Group(nodename + Suppress('(') + inputnames + Optional("|" + outputnames) + Suppress(")")).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed = connectlist.parseString(connstr) check_numconnections(parsed) return parsed
def getLogLineBNF_DBpedia36(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) hashipAddress = Word(nums+alphas) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + " " + integer + ":" + integer + ":" + integer ) + timeZoneOffset + Suppress("]") ) logLineBNF = ( hashipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields2) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def parse_payload(self, payload): expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas + "_", alphanums + "_") function_call = identifier.setResultsName("name") + LPAR + Group( Optional(delimitedList(expr))) + RPAR integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") qstr = QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) qstrsingle = QuotedString(quoteChar="'", escChar='\\', unquoteResults=False) operand = (identifier | real | integer | qstr | qstrsingle) plusop = oneOf('+ -') expr << infixNotation(operand, [(plusop, 2, opAssoc.LEFT)]) out = [] for t, s, e in function_call.scanString(payload): out.append({ "action": t[0], "arguments": t[1].asList() if type(t[1]) != str else t[1] }) return out
def parse_sexp(data): '''parse sexp/S-expression format and return a python list''' # define punctuation literals LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR = map(Suppress, "()[]{}|") decimal = Word("123456789", nums).setParseAction(lambda t: int(t[0])) bytes = Word(printables) raw = Group(decimal.setResultsName("len") + Suppress(":") + bytes).setParseAction(OtrPrivateKeys.verifyLen) token = Word(alphanums + "-./_:*+=") base64_ = Group(Optional(decimal, default=None).setResultsName("len") + VBAR + OneOrMore(Word( alphanums +"+/=" )).setParseAction(lambda t: b64decode("".join(t))) + VBAR).setParseAction(OtrPrivateKeys.verifyLen) hexadecimal = ("#" + OneOrMore(Word(hexnums)) + "#")\ .setParseAction(lambda t: int("".join(t[1:-1]),16)) qString = Group(Optional(decimal, default=None).setResultsName("len") + dblQuotedString.setParseAction(removeQuotes)).setParseAction(OtrPrivateKeys.verifyLen) simpleString = raw | token | base64_ | hexadecimal | qString display = LBRK + simpleString + RBRK string_ = Optional(display) + simpleString sexp = Forward() sexpList = Group(LPAR + ZeroOrMore(sexp) + RPAR) sexp << ( string_ | sexpList ) try: sexpr = sexp.parseString(data) return sexpr.asList()[0][1:] except ParseFatalException, pfe: print("Error:", pfe.msg) print(pfe.loc) print(pfe.markInputline())
def parse_connection_str(connstr): ## Grammar for connection syntax digits="0123456789" othervalid="_.@" identifier= Word(alphas+digits+othervalid) nodename=identifier.setResultsName('nodename') outputnames = delimitedList( identifier ).setResultsName('outputnames') inputnames = delimitedList( identifier ).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode= Group( nodename + Suppress('(') + inputnames + Optional( "|" + outputnames) + Suppress(")") ).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed=connectlist.parseString(connstr) check_numconnections(parsed) return parsed
def parse_ampersand_comment(s): word = Word(string.letters + string.digits + "%_") key = word.setResultsName("key") + Suppress("=") single_value = (Word(string.letters + string.digits + "-.") | QuotedString("'") | QuotedString('"')) range_value = Group( Suppress("{") + single_value.setResultsName("min") + Suppress(",") + single_value.setResultsName("max") + Suppress("}")) pair = (key + (single_value | range_value).setResultsName("value")) g = OneOrMore(pair) d = [] for x in g.searchString(s): v = x.value if type(v) == str: try: v = float(v) except ValueError: pass else: try: v = map(float, v.asList()) except ValueError: pass d.append((x.key, v)) return d
def parse_ampersand_comment(s): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict word = Word(string.letters+string.digits+"%_") key = word.setResultsName("key") + Suppress("=") single_value = (Word(string.letters+string.digits+"-.") | QuotedString("'") | QuotedString('"')) range_value = Group(Suppress("{") + single_value.setResultsName("min") + Suppress(",") + single_value.setResultsName("max") + Suppress("}")) pair = (key + (single_value | range_value).setResultsName("value")) g = OneOrMore(pair) d = [] for x in g.searchString(s): v = x.value if type(v) == str: try: v = float(v) except ValueError: pass else: try: v = map(float, v.asList()) except ValueError: pass d.append((x.key, v)) return d
def build_grammer(): identifier = Word(alphas+"_", alphanums+"_") function_call = identifier.setResultsName("func",listAllMatches=True) wfstmt = Optional(delimitedList(function_call)) return wfstmt
def setup(self): # some expressions that will be reused units = [] for unit in time_units: units.append(Keyword(unit)) units = get_match_first(units) units = units.setResultsName("unit") units.setParseAction(lambda s, l, tok: time_units[tok[0]]) multiplier = Word(nums) multiplier = multiplier.setResultsName("multiply") multiplier.setParseAction(self.parseMulti) adder = [] for add in add_modifiers: adder.append(CL(add)) adder = get_match_first(adder) adder = adder.setResultsName("add") adder.setParseAction(self.parseAdd) modifier = (multiplier | adder) # + FollowedBy(units) # ago # # e.g 5 days ago ago = Optional(modifier) + units + Suppress(Word("ago")) ago.setParseAction(self.parseAgo) # time range # # e.g in the lat 10 days time_range = Suppress(Optional( CL("in the"))) + \ Suppress(Word("last") | Word("past")) + \ Optional(modifier) + \ units time_range.setParseAction(self.parseRange) # special keyword handling # # e.g yesterday # only handles yesterday right now, maybe need to be modified to do # more special_expr = [] for expr in special: special_expr.append( Keyword(expr).setParseAction( lambda s, l, tok: special[tok[0]])) special_expr = get_match_first(special_expr) special_expr = special_expr.setResultsName("unit") special_expr.setParseAction(self.parseAgo) parser = (special_expr | ago | time_range) return parser
def setup(self): # some expressions that will be reused units = [] for unit in time_units: units.append(Keyword(unit)) units = get_match_first(units) units = units.setResultsName("unit") units.setParseAction(lambda s, l, tok: time_units[tok[0]]) multiplier = Word(nums) multiplier = multiplier.setResultsName("multiply") multiplier.setParseAction(self.parseMulti) adder = [] for add in add_modifiers: adder.append(CL(add)) adder = get_match_first(adder) adder = adder.setResultsName("add") adder.setParseAction(self.parseAdd) modifier = (multiplier | adder) # + FollowedBy(units) # ago # # e.g 5 days ago ago = Optional(modifier) + units + Suppress(Word("ago")) ago.setParseAction(self.parseAgo) # time range # # e.g in the lat 10 days time_range = Suppress(Optional( CL("in the"))) + \ Suppress(Word("last") | Word("past")) + \ Optional(modifier) + \ units time_range.setParseAction(self.parseRange) # special keyword handling # # e.g yesterday # only handles yesterday right now, maybe need to be modified to do # more special_expr = [] for expr in special: special_expr.append( Keyword(expr).setParseAction( lambda s, l, tok: special[tok[0]])) special_expr = get_match_first(special_expr) special_expr = special_expr.setResultsName("unit") special_expr.setParseAction(self.parseAgo) parser = (special_expr | ago | time_range) return parser
def parseTypes(path): msgs = set() types = {} for line in lineGen(path): number = Word(nums) word = Word(alphanums + "-_") wordList = Forward() wordList = word + ZeroOrMore(',' + word) par = (Literal('NetworkPartition').setResultsName('type') +\ '(' + Literal('Set') + '(' +\ wordList.setResultsName('p1') + ')' + ',' +\ Literal('Set') + '(' +\ wordList.setResultsName('p2') + ')' + \ ')') subType = (word + Optional(nestedExpr('(', ')'))).setResultsName('msg') msg = (Literal('MsgEvent').setResultsName('type') +\ '(' + word.setResultsName('src') + ',' +\ word.setResultsName('dst') + ',' +\ subType + ')') event = Word( nums ) +\ Literal('Unique') + "(" + (msg | par) + ',' +\ number.setResultsName('uid') + ')' result = event.parseString(line) key = result.uid if result.type == 'MsgEvent': msg = list2tuple(result.msg.asList()) value = (result.type, result.src, result.dst, msg) msgs.add(msg) elif result.type == 'NetworkPartition': value = (result.type, result.p1, result.p2) types[key] = value return types
def fromString(inputText): text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) eq = Suppress(Word("=")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") structIdentifer = Group(typeIdentifier.setResultsName('type') + identifier.setResultsName('identifier') + Optional(eq) + Optional(CharsNotIn(";").setResultsName('defaultValue')) + semicolon) structIdentifers = Group(OneOrMore(structIdentifer)) ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) structDef = Word("struct").setResultsName('type') + identifier.setResultsName('name') + op + structIdentifers.setResultsName("structIdentifiers") + cl + semicolon dictionaryDef = Word("dictionary").setResultsName('type') + lt + CharsNotIn("<>").setResultsName('content') + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence").setResultsName('type') + lt + typeIdentifier.setResultsName('typeSequence') + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon exceptionDef = Word("exception").setResultsName('type') + identifier.setResultsName('name') + op + CharsNotIn("{}").setResultsName('content') + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef.setResultsName('decorator')) + retValDef.setResultsName('ret') + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef.setResultsName('raise')) + semicolon ) interfaceDef = Word('interface').setResultsName('type') + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)).setResultsName('methods') + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return tree
def parseTypes(path): msgs = set() types = {} for line in lineGen(path): number = Word(nums) word = Word(alphanums + "-_") wordList = Forward() wordList = word + ZeroOrMore(',' + word) par = (Literal('NetworkPartition').setResultsName('type') +\ '(' + Literal('Set') + '(' +\ wordList.setResultsName('p1') + ')' + ',' +\ Literal('Set') + '(' +\ wordList.setResultsName('p2') + ')' + \ ')') subType = (word + Optional(nestedExpr('(', ')'))).setResultsName('msg') msg = (Literal('MsgEvent').setResultsName('type') +\ '(' + word.setResultsName('src') + ',' +\ word.setResultsName('dst') + ',' +\ subType + ')') event = Word( nums ) +\ Literal('Unique') + "(" + (msg | par) + ',' +\ number.setResultsName('uid') + ')' result = event.parseString(line) key = result.uid if result.type == 'MsgEvent': msg = list2tuple( result.msg.asList() ) value = (result.type, result.src, result.dst, msg) msgs.add(msg) elif result.type == 'NetworkPartition': value = (result.type, result.p1, result.p2) types[key] = value return types
def get_expression(self, method_list): methods = method_list numbers = "0123456789" dice_numbers = numbers + "".join(self.non_int_die) dice = Literal("d") operators = '+ -' comparators = '< <= > >= = !=' digits = Word(numbers) dice_digits = Word(dice_numbers) dice_expr = digits.setResultsName("number_of_dice") \ + dice \ + dice_digits.setResultsName("sides") \ + Optional(oneOf(operators).setResultsName("dice_modifier")) \ + Optional(digits.setResultsName("dice_boost")) \ + Optional(oneOf(comparators).setResultsName("success_evaluator")) \ + Optional(digits.setResultsName("success_threshhold")) \ + ZeroOrMore(Group(oneOf(methods).setResultsName('method_name') \ + Optional(oneOf(comparators).setResultsName("method_operator")) \ + Optional(digits.setResultsName("method_value"))).setResultsName('methods', True)) \ + Optional(oneOf(operators).setResultsName("pool_modifier")) \ + Optional(digits.setResultsName("pool_boost")) return dice_expr
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) identifier = Word( alphas+"_", alphanums+"_" ) commIdentifier = Group(identifier.setResultsName('identifier') + Optional(opp + (CaselessLiteral("ice")|CaselessLiteral("ros")).setResultsName("type") + clp)) # Imports idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) # Communications implementsList = Group(CaselessLiteral('implements') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) requiresList = Group(CaselessLiteral('requires') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) subscribesList = Group(CaselessLiteral('subscribesTo') + commIdentifier + ZeroOrMore(Suppress(Word(',')) + commIdentifier) + semicolon) publishesList = Group(CaselessLiteral('publishes') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) communicationList = implementsList | requiresList | subscribesList | publishesList communications = Group( Suppress(CaselessLiteral("communications")) + op + ZeroOrMore(communicationList) + cl + semicolon) # Language language = Suppress(CaselessLiteral("language")) + (CaselessLiteral("cpp")|CaselessLiteral("python")) + semicolon # GUI gui = Group(Optional(Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") + opp + identifier + clp + semicolon )) # additional options options = Group(Optional(Suppress(CaselessLiteral("options")) + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)) componentContents = communications.setResultsName('communications') & language.setResultsName('language') & gui.setResultsName('gui') & options.setResultsName('options') component = Suppress(CaselessLiteral("component")) + identifier.setResultsName("name") + op + componentContents.setResultsName("properties") + cl + semicolon CDSL = idslImports.setResultsName("imports") + component.setResultsName("component") CDSL.ignore( cppStyleComment ) tree = CDSL.parseString(text) return CDSLParsing.component(tree)
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) identifier = Word( alphas+"_", alphanums+"_" ) commIdentifier = Group(identifier.setResultsName('identifier') + Optional(opp + (CaselessLiteral("ice")|CaselessLiteral("ros")).setResultsName("type") + clp)) # Imports idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) # Communications implementsList = Group(CaselessLiteral('implements') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) requiresList = Group(CaselessLiteral('requires') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) subscribesList = Group(CaselessLiteral('subscribesTo') + commIdentifier + ZeroOrMore(Suppress(Word(',')) + commIdentifier) + semicolon) publishesList = Group(CaselessLiteral('publishes') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) communicationList = implementsList | requiresList | subscribesList | publishesList communications = Group( Suppress(CaselessLiteral("communications")) + op + ZeroOrMore(communicationList) + cl + semicolon) # Language language = Suppress(CaselessLiteral("language")) + (CaselessLiteral("cpp")|CaselessLiteral("python")) + semicolon # GUI gui = Group(Optional(Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") + opp + identifier + clp + semicolon )) # additional options options = Group(Optional(Suppress(CaselessLiteral("options")) + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)) componentContents = communications.setResultsName('communications') & language.setResultsName('language') & gui.setResultsName('gui') & options.setResultsName('options') component = Suppress(CaselessLiteral("component")) + identifier.setResultsName("name") + op + componentContents.setResultsName("properties") + cl + semicolon CDSL = idslImports.setResultsName("imports") + component.setResultsName("component") CDSL.ignore( cppStyleComment ) tree = CDSL.parseString(text) return CDSLParsing.component(tree)
def makeNewickParser(): # pyparsing from pyparsing import Combine, Optional, Literal, CaselessLiteral, \ Word, alphanums, \ nums, oneOf, Group, Dict, Forward, \ ParseResults, CharsNotIn, ZeroOrMore # literals lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() semicolon = Literal(":").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") # terminal rules name = Word(alphanums + "_" + "-" + "." + "+") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) dist = fnumber bootstrap = fnumber # recursive rules subtree = Forward() subtreelist = Forward() subtree << \ Group( ( (lparen + subtreelist + rparen).setResultsName("subtree") | name.setResultsName("name") ) + Optional( CharsNotIn(",);").setResultsName("data") ) ) subtreelist << subtree + Optional(comma + subtreelist) # top level rule tree = subtree + Word(";").suppress() return tree.parseString
def makeNewickParser(): # pyparsing from pyparsing import Combine, Optional, Literal, CaselessLiteral, \ Word, alphanums, \ nums, oneOf, Group, Dict, Forward, \ ParseResults, CharsNotIn, ZeroOrMore # literals lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() semicolon = Literal(":").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") # terminal rules name = Word(alphanums + "_" + "-" + "." + "+") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) dist = fnumber bootstrap = fnumber # recursive rules subtree = Forward() subtreelist = Forward() subtree << \ Group( ( (lparen + subtreelist + rparen).setResultsName("subtree") | name.setResultsName("name") ) + Optional( CharsNotIn(",);").setResultsName("data") ) ) subtreelist << subtree + Optional(comma + subtreelist) # top level rule tree = subtree + Word(";").suppress() return tree.parseString
def urlsplit(url, scheme='', allow_fragments=1): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" global _urlBNF key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() if (_urlBNF is None): scheme_chars = alphanums + "+-." urlscheme = Word( scheme_chars ) netloc_chars = "".join( [ c for c in printables if c not in "/." ] ) netloc = Combine(delimitedList( Word( netloc_chars ), ".", combine=True )) path_chars = "".join( [ c for c in printables if c not in "?" ] ) path = Word( path_chars ) query_chars = "".join( [ c for c in printables if c not in "#" ] ) query = Word( query_chars ) fragment = Word( printables+" " ) _urlBNF = Combine(Optional(urlscheme.setResultsName("scheme") + ":" ) + Optional(Literal("//").suppress() + netloc, default="").setResultsName("netloc") + Optional(path.setResultsName("path"), default="") + Optional(Literal("?").suppress() + query, default="").setResultsName("query") + Optional(Literal("#").suppress() + fragment, default="").setResultsName("fragment") ) tokens = _urlBNF.parseString( url ) tuple = (tokens.scheme or scheme), tokens.netloc[0], tokens.path, tokens.query[0], tokens.fragment[0] _parse_cache[key] = tuple return tuple
def urlsplit(url, scheme='', allow_fragments=1): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" global _urlBNF key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() if (_urlBNF is None): scheme_chars = alphanums + "+-." urlscheme = Word( scheme_chars ) netloc_chars = "".join( [ c for c in printables if c not in "/." ] ) netloc = Combine(delimitedList( Word( netloc_chars ), ".", combine=True )) path_chars = "".join( [ c for c in printables if c not in "?" ] ) path = Word( path_chars ) query_chars = "".join( [ c for c in printables if c not in "#" ] ) query = Word( query_chars ) fragment = Word( printables+" " ) _urlBNF = Combine(Optional(urlscheme.setResultsName("scheme") + ":" ) + Optional(Literal("//").suppress() + netloc, default="").setResultsName("netloc") + Optional(path.setResultsName("path"), default="") + Optional(Literal("?").suppress() + query, default="").setResultsName("query") + Optional(Literal("#").suppress() + fragment, default="").setResultsName("fragment") ) tokens = _urlBNF.parseString( url ) tuple = (tokens.scheme or scheme), tokens.netloc[0], tokens.path, tokens.query[0], tokens.fragment[0] _parse_cache[key] = tuple return tuple
def receiver_input_rule(): path = Combine(ZeroOrMore(word + ".") + word) input = path.setResultsName("input") operator = oneOf(operators.keys()).setResultsName("operator") value = path.setResultsName("value") comparison = operator + value is_or_was = Word("is") | Word("was") condition = Group(input + is_or_was.setResultsName("temporal") + comparison) res = ZeroOrMore(condition + _and) + condition conditions = Group(res).setResultsName("conditions") return Optional("always").setResultsName("always_fire_rule") + when + conditions + then + actions
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) dictionaryDef = Word("dictionary") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon structDef = Word("struct") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon exceptionDef = Word("exception") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef) + retValDef + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef) + semicolon ) interfaceDef = Word("interface") + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)) + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return IDSLParsing.module(tree)
def receiver_input_rule(): path = Combine(ZeroOrMore(word + ".") + word) input = path.setResultsName("input") operator = oneOf(operators.keys()).setResultsName("operator") value = path.setResultsName("value") comparison = operator + value is_or_was = Word("is") | Word("was") condition = Group(input + is_or_was.setResultsName("temporal") + comparison) res = ZeroOrMore(condition + _and) + condition conditions = Group(res).setResultsName("conditions") return Optional("always").setResultsName( "always_fire_rule") + when + conditions + then + actions
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums+"_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums + "_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def _get_handbrake_title_pattern(self): title = Literal("+ title").suppress() integer = Word("0123456789") time = Combine(integer + ":" + integer + ":" + integer) duration = Literal("+ duration:").suppress() subtitle = Literal("+ subtitle tracks:") iso = Literal('(iso639-2:').suppress() + Word(alphas) subtitle_track = Literal("+").suppress() + Group(integer + SkipTo(iso).suppress() + iso) + restOfLine.suppress() title_num = integer.setResultsName("title") duration_num = time.setResultsName("duration") subtitles = Group(ZeroOrMore(subtitle_track)).setResultsName("subtitles") pattern = title + title_num + \ SkipTo(duration).suppress() + \ duration + duration_num + \ SkipTo(subtitle).suppress() + subtitle.suppress() + subtitles return pattern
def __init__(self, network): self.network = network # self.user_defined_sets = {} attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) self.nodeQuery = attribute.setResultsName("nodeQuery").setFailAction(parse_fail_action) self.children = Literal("children").setResultsName("children") self.relation = self.children.setResultsName("relation").setFailAction(parse_fail_action) set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = ("(" + self.nodeQuery + ")" + self.relation + (empty_set | set_values)) self.rpkiSetsLine = (self.set_definition.setResultsName("set_definition")) self.path = {}
def parse_sexp(data): '''parse sexp/S-expression format and return a python list''' # define punctuation literals LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR = map(Suppress, "()[]{}|") decimal = Word("123456789", nums).setParseAction(lambda t: int(t[0])) bytes = Word(printables) raw = Group(decimal.setResultsName("len") + Suppress(":") + bytes).setParseAction(OtrPrivateKeys.verifyLen) token = Word(alphanums + "-./_:*+=") base64_ = Group( Optional(decimal, default=None).setResultsName("len") + VBAR + OneOrMore(Word(alphanums + "+/=")).setParseAction( lambda t: b64decode("".join(t))) + VBAR).setParseAction( OtrPrivateKeys.verifyLen) hexadecimal = ("#" + OneOrMore(Word(hexnums)) + "#")\ .setParseAction(lambda t: int("".join(t[1:-1]),16)) qString = Group( Optional(decimal, default=None).setResultsName("len") + dblQuotedString.setParseAction(removeQuotes)).setParseAction( OtrPrivateKeys.verifyLen) simpleString = raw | token | base64_ | hexadecimal | qString display = LBRK + simpleString + RBRK string_ = Optional(display) + simpleString sexp = Forward() sexpList = Group(LPAR + ZeroOrMore(sexp) + RPAR) sexp << (string_ | sexpList) try: sexpr = sexp.parseString(data) return sexpr.asList()[0][1:] except ParseFatalException, pfe: print("Error:", pfe.msg) print(pfe.loc) print(pfe.markInputline())
def _parse_query(self, query): operator = oneOf("= > >= < <= ~ ! !~") joiner = oneOf('AND OR') _from = Suppress(Literal('FROM')) + Word(printables) _from = _from.setResultsName('FROM') _select = Suppress(Literal('SELECT')) + Word(printables) _select = _select.setResultsName('SELECT') _val = QuotedString('"', escQuote="'", escChar='\\') _val = _val.setResultsName('VAL') _conditional = Word(printables) + operator + _val _conditional = _conditional.setResultsName('COND') lparen, rparen = Literal('('), Literal(')') _where = Suppress(Literal('WHERE')) + ZeroOrMore(lparen) + OneOrMore(Group(_conditional) + ZeroOrMore(joiner)) + ZeroOrMore(rparen) _where = _where.setResultsName('WHERE') _query = _from + _select + _where self.parsed_query = _query.parseString(query, parseAll=True)
def parse_pabl(self, raw_pabl): INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction( self.check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(self.check_unindent) UNDENT.setParseAction(self.unindent) terminator = Literal(';').suppress() comment = Literal('#') + restOfLine item_name = Word(alphas, alphanums + '_') variable = Word(alphas, alphanums + '_.') variable_as = (variable + 'as' + item_name) stmt = Forward() suite = Group( OneOrMore(empty + stmt.setParseAction(self.check_peer_indent))) suite.ignore(comment) item_start = Literal('@item').suppress() item_end = Literal(':').suppress() permission_start = Literal('@permissions') item_decl = (item_start + item_name.setResultsName('item') + item_end) item_defn = Group(item_decl + INDENT + suite + UNDENT) permission_decl = (permission_start + Group( delimitedList(item_name).setResultsName('permissions')) + item_end) permission_defn = Group(permission_decl + INDENT + suite + UNDENT) fieldList = delimitedList( Group(variable_as) | variable ).setResultsName('fields') + terminator stmt << (item_defn | fieldList | Group(permission_defn)) parseTree = suite.parseString(raw_pabl) return parseTree
def parse_ampersand_comment(s): word = Word(string.letters+string.digits+"%_") key = word.setResultsName("key") + Suppress("=") single_value = (Word(string.letters+string.digits+"-.") | QuotedString("'") | QuotedString('"')) range_value = Group(Suppress("{") + single_value.setResultsName("min") + Suppress(",") + single_value.setResultsName("max") + Suppress("}")) pair = (key + (single_value | range_value).setResultsName("value")) g = OneOrMore(pair) d = [] for x in g.searchString(s): v = x.value if type(v) == str: try: v = float(v) except ValueError: pass else: try: v = map(float, v.asList()) except ValueError: pass d.append((x.key, v)) return d
tok_sql_literal_from + \ tok_sql_table_list.setResultsName("table_list") + \ Optional(tok_sql_literal_where + \ tok_sql_where_clause.\ setResultsName("where_clause")) + \ Optional(tok_sql_literal_group + \ Suppress(tok_sql_literal_by) + \ tok_sql_cols.setResultsName("groupby_clause") + \ Optional(tok_sql_literal_having + tok_sql_kvp_list.\ setResultsName("having_clause")))) sql_insert = (tok_sql_literal_insert.setResultsName("op") + \ Optional(tok_sql_literal_ignore) + \ tok_sql_literal_into + \ tok_sql_identifier.setResultsName("table") + \ Optional(tok_sql_open_paren + \ tok_sql_cols.setResultsName("cols") + \ tok_sql_close_paren ) + \ Optional(tok_sql_literal_values) + \ Optional(tok_sql_open_paren) + \ tok_sql_vals.setResultsName("vals") + \ Optional(tok_sql_close_paren) + \ Optional(tok_sql_literal_on + \ tok_sql_literal_duplicate + \ tok_sql_literal_key + \ tok_sql_literal_update + \ tok_sql_kvp_list.setResultsName("dup_list") ) + \ Optional(tok_sql_literal_semicol))
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word ) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"') ).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")")) ).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << (Group( Suppress(Keyword('no', caseless=True)) + operator_not ).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << (Group( operator_not + Suppress(Keyword('and', caseless=True)) + operator_and ).setResultsName('and') | Group( operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << (Group( operator_and + Suppress(Keyword('or', caseless=True)) + operator_or ).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + ( White(min=16).setParseAction(lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task')) ) + restOfLine.setResultsName('message') )
# ====================> Basis File <========================== comment = Literal("#") + restOfLine parser_atom_label = ( Word(srange("[A-Z]"), max=1) + Optional(Word(srange("[a-z]"), max=1)) ) parser_basis_name = Word(alphanums + "-") + Suppress(restOfLine) parser_format = OneOrMore(natural + NotAny(FollowedBy(point))) parser_key = ( parser_atom_label.setResultsName("atom") + parser_basis_name.setResultsName("basisName") + Suppress(Literal("1")) ) parser_basis_data = OneOrMore(floatNumber) parser_basis = ( parser_key + parser_format.setResultsName("format") + parser_basis_data.setResultsName("coeffs") ) top_parser_basis = ( OneOrMore(Suppress(comment)) + OneOrMore( Group(parser_basis + Suppress(Optional(OneOrMore(comment))))) )
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
import sys from io import StringIO from pyparsing import Word, printables, Optional, \ alphas, alphanums, ZeroOrMore, restOfLine, Combine, \ Literal, Group, removeQuotes, CharsNotIn import twill3.commands as commands from . import namespaces from .errors import TwillAssertionError, TwillNameError # pyparsing stuff # basically, a valid Python identifier: command = Word(alphas + "_", alphanums + "_") command = command.setResultsName('command') command.setName("command") # arguments to it. # we need to reimplement all this junk from pyparsing because pcre's # idea of escapable characters contains a lot more than the C-like # thing pyparsing implements _bslash = "\\" _sglQuote = Literal("'") _dblQuote = Literal('"') _escapables = printables _escapedChar = Word(_bslash, _escapables, exact=2) dblQuotedString = Combine( _dblQuote + ZeroOrMore(CharsNotIn('\\"\n\r') | _escapedChar | '""') + _dblQuote).streamline().setName("string enclosed in double quotes")
fishing results out later, much better thean indexing a list, which might change size if we change things later. Dictionary is not sensitive to that. We also use () for long lines not \ because apparently it's better. """ """ checkout - standard format grammar definition OUT product version pool# user host "isv_def" count cur_use cur_resuse \ server_handle share_handle process_id "project" "requested product" \ "requested version" mm/dd hh:mm:ss example: OUT imarisbase 6.0 9 heisenberg_lab my-workstation-72 "" 1 1 0 26e 26e 410 "" "" "" 06/16 10:57:52 """ rlmRlogCheckoutEntry_std = ( Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + poolNum + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") + count.setResultsName("count") + curUse + curReuse + serverHandle.setResultsName("serverHandle") + shareHandle + processId + project + requestedProduct + requestedVersion + date.setResultsName("date") + timeHHMMSS.setResultsName("time")) """ checkout - small format grammar definition OUT product version user host "isv_def" count server_handle share_handle hh:mm example: OUT imarisbase 7.4 serrmeli my-workstation-72 "" 1 1281 7c1 14:22 """ rlmRlogCheckoutEntry_sml = (Literal("OUT").setResultsName("checkedOut") + product.setResultsName("product") + version.setResultsName("version") + user.setResultsName("user") + host.setResultsName("host") + isvDef.setResultsName("isDef") +
true = Literal("True") false = Literal("False") atom = Forward() infix = infixNotation(atom, [ ('not', 1, opAssoc.RIGHT, _make_unary), (oneOf('* /'), 2, opAssoc.LEFT, _make_binary), (oneOf('+ -'), 2, opAssoc.LEFT, _make_binary), (oneOf('> gt >= ge < lt <= le != ne == eq'), 2, opAssoc.LEFT, _make_binary), ('and', 2, opAssoc.LEFT, _make_binary), ('or', 2, opAssoc.LEFT, _make_binary), ('in', 2, opAssoc.LEFT, _make_binary), ]) dellist = delimitedList(Optional(atom)) listing = lbr.suppress() + dellist + rbr.suppress() function = identifier.setResultsName('name') + lpar.suppress() + Group( Optional(delimitedList(atom))).setResultsName("args") + rpar.suppress() atom << (listing | number | string | variable | true | false | none | function) _false = Const(False) _true = Const(True) number.setParseAction(lambda t: Const(_number(t[0]))) variable.setParseAction(lambda t: Variable(t[0].strip("$"))) string.setParseAction(lambda t: Const(_str(t[0]))) none.setParseAction(lambda t: _false) false.setParseAction(lambda t: _false) true.setParseAction(lambda t: _true) dellist.setParseAction(lambda s, l, t: List(t[:])) function.setParseAction(_make_func) atom.setParseAction(lambda s, l, t: t[0])
from pyparsing import Combine, LineEnd, Literal, Optional, Suppress, Word, alphanums urn_word = Word(alphanums + "_$?=%.&,") ge_metrics_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("metrics").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("run_id") + Suppress(":") + urn_word.setResultsName("expectation_suite_name") + Suppress(":") + urn_word.setResultsName("metric_name") + Optional(Suppress(":") + urn_word.setResultsName("metric_kwargs")) + Suppress(LineEnd()) ) ge_validations_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("validations").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("expectation_suite_name") + Suppress(":") + urn_word.setResultsName("metric_name") + Optional(Suppress(":") + urn_word.setResultsName("metric_kwargs")) + Suppress(LineEnd()) ) ge_stores_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("stores").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("store_name") + Suppress(":")
def __init__(self, network): self.network = network self.g_business_relationship = nx.DiGraph() self.user_defined_sets = {} self.user_library_calls = [] self.user_defined_functions = {} # Grammars #TODO: tidy this up attribute_unnamed = Word(alphanums+'_'+".") attribute = attribute_unnamed.setResultsName("attribute") self.attribute = attribute lt = Literal("<").setResultsName("<") le = Literal("<=").setResultsName("<=") eq = Literal("=").setResultsName("=") ne = Literal("!=").setResultsName("!=") ge = Literal(">=").setResultsName(">=") gt = Literal(">").setResultsName(">") wildcard = Literal("*").setResultsName("wildcard") self.wildcard = wildcard self.prefix_lists = {} self.tags_to_allocate = set() self.allocated_tags = {} self._opn = { '<': operator.lt, '<=': operator.le, '=': operator.eq, '!=': operator.ne, '>=': operator.ge, '>': operator.gt, '&': set.intersection, '|': set.union, } # map alphanum chars to alphanum equivalents for use in tags self._opn_to_tag = { '<': "lt", '<=': "le", '=': "eq", '!=': "ne", '>=': "ge", '>': "gt", '&': "and", '|': "or", } # Both are of comparison to access in same manner when evaluating comparison = (lt | le | eq | ne | ge | gt).setResultsName("comparison") stringComparison = (eq | ne).setResultsName("comparison") # #quoted string is already present float_string = Word(nums).setResultsName("value").setParseAction(lambda t: float(t[0])) integer_string = Word(nums).setResultsName("value").setParseAction(lambda t: int(t[0])) #TODO: use numString, and make integer if fiull stop #TODO: allow parentheses? - should be ok as pass to the python parser ipField = Word(nums, max=3) ipAddress = Combine( ipField + "." + ipField + "." + ipField + "." + ipField ).setResultsName("ipAddress") boolean_and = Literal("&").setResultsName("&") boolean_or = Literal("|").setResultsName("|") boolean = (boolean_and | boolean_or).setResultsName("boolean") self._boolean = boolean # need to use in checking #TODO fix this matching 2a.ab when that should match a string numericQuery = Group(attribute + comparison + float_string).setResultsName( "numericQuery") stringValues = (attribute_unnamed | quotedString.setParseAction(removeQuotes) ).setResultsName("value") stringQuery = Group(attribute + stringComparison + stringValues).setResultsName( "stringQuery") wildcardQuery = wildcard.setResultsName("wildcardQuery") singleQuery = numericQuery | stringQuery | wildcardQuery singleQuery.setFailAction(parse_fail_action) self.nodeQuery = singleQuery + ZeroOrMore(boolean + singleQuery) self.u_egress = Literal("egress->").setResultsName("u_egress") self.v_ingress = Literal("->ingress").setResultsName("v_ingress") self.u_ingress = Literal("ingress<-").setResultsName("u_ingress") self.v_egress = Literal("<-egress").setResultsName("v_egress") edgeType = ( self.u_egress | self.u_ingress | self.v_egress | self.v_ingress).setResultsName("edgeType").setFailAction(parse_fail_action) self.edgeQuery = ("(" + self.nodeQuery.setResultsName("query_a") + ")" + edgeType + "(" + self.nodeQuery.setResultsName("query_b") + ")").setFailAction(parse_fail_action) #start of BGP queries originQuery = (Literal("Origin").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("originQuery") transitQuery = (Literal("Transit").setResultsName("attribute") + #this is a workaround for the match, comparison, value 3-tuple in processing Literal("(").setResultsName("comparison") + Group(self.nodeQuery).setResultsName("value") + Suppress(")")).setResultsName("transitQuery") prefixList = Literal("prefix_list") matchPl = (prefixList.setResultsName("attribute") + comparison + attribute.setResultsName("value")) matchTag = (Literal("tag").setResultsName("attribute") + comparison + attribute.setResultsName("value")) #tags contain -> tag = aaa inTags = ( Literal("tags").setResultsName("attribute").setParseAction(lambda x: "tag") + Literal("contain").setResultsName("comparison").setParseAction(lambda x: "=") + attribute_unnamed.setResultsName("value") ) bgpMatchQuery = Group(matchPl | matchTag | inTags | originQuery | transitQuery ).setResultsName("bgpMatchQuery").setFailAction(parse_fail_action) self.bgpMatchQuery = bgpMatchQuery setLP = (Literal("setLP").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setLP") setMED = (Literal("setMED").setResultsName("attribute") + integer_string.setResultsName("value")).setResultsName("setMED") addTag = (Literal("addTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("addTag") removeTag = (Literal("removeTag").setResultsName("attribute") + attribute.setResultsName("value")).setResultsName("removeTag") #TODO: need to set blank value reject = Literal("reject") #TODO: remove once move quagga output inside module self.reject = reject rejectAction = (reject.setResultsName("attribute") + Literal("route").setResultsName("value")).setResultsName("reject") setNextHop = (Literal("setNextHop").setResultsName("attribute") + ipAddress.setResultsName("value")).setResultsName("setNextHop") setOriginAttribute = (Literal("setOriginAttribute").setResultsName("attribute") + (oneOf("IGP BGP None").setResultsName("value"))).setResultsName("setOriginAttribute") bgpAction = Group(addTag | setLP | setMED | removeTag | setNextHop | setOriginAttribute | rejectAction).setResultsName("bgpAction") # The Clauses ifClause = Group(Suppress("if") + bgpMatchQuery + ZeroOrMore(Suppress(boolean_and) + bgpMatchQuery)).setResultsName("if_clause") actionClause = bgpAction + ZeroOrMore(Suppress(boolean_and) + bgpAction) thenClause = Group(Suppress("then") + actionClause).setResultsName("then_clause") ifThenClause = Group(Suppress("(") + ifClause + thenClause + Suppress(")")).setResultsName("ifThenClause") elseActionClause = Group(Suppress("(") + actionClause + Suppress(")")).setResultsName("else_clause") # Support actions without a condition (ie no "if") unconditionalAction = Group(Suppress("(") + Group(actionClause).setResultsName("unconditionalActionClause") + Suppress(")")).setResultsName("bgpSessionQuery") # Query may contain itself (nested) bgpSessionQuery = Forward() bgpSessionQuery << ( ifThenClause + Optional( Suppress("else") + (elseActionClause | bgpSessionQuery)) ).setResultsName("bgpSessionQuery") bgpSessionQuery = bgpSessionQuery | unconditionalAction self.bgpSessionQuery = bgpSessionQuery self.bgpApplicationQuery = self.edgeQuery + Suppress(":") + self.bgpSessionQuery # Library stuff set_values = Suppress("{") + delimitedList( attribute, delim=',').setResultsName("set_values") + Suppress("}") #Set to empty set, rather than empty list as empty list is processed differently somewhere in parser empty_set = Literal("{}").setResultsName("set_values").setParseAction(lambda x: set()) self.set_definition = attribute.setResultsName("set_name") + Suppress("=") + (empty_set | set_values) library_params = attribute | Group(set_values) | empty_set library_function = attribute.setResultsName("def_name") + Suppress("(") + delimitedList( library_params, delim=',').setResultsName("def_params") + Suppress(")") library_function.setFailAction(parse_fail_action) self.library_def = Suppress("define") + library_function self.library_call = Suppress("apply") + library_function self.library_def.setFailAction(parse_fail_action) self.library_edge_query = (self.attribute.setResultsName("query_a") + edgeType + self.attribute.setResultsName("query_b")) self.library_edge_query.setFailAction(parse_fail_action) library_edge_definition = self.library_edge_query + Suppress(":") + self.bgpSessionQuery library_global_definition = "global tags = {" + delimitedList( attribute, delim=',').setResultsName("tags") + "}" self.library_entry = library_global_definition.setResultsName("global_tags") | library_edge_definition.setResultsName("library_edge") self.library_entry.setFailAction(parse_fail_action) self.bgpPolicyLine = ( self.bgpApplicationQuery.setResultsName("bgpApplicationQuery") | self.library_call.setResultsName("library_call") | self.set_definition.setResultsName("set_definition") )
else: return Group((Suppress(funOrbNumber(n)) + funCoefficients(n)).setResultsName("lastCoeffs")) # ====================> Basis File <========================== comment = Literal("#") + restOfLine parseAtomLabel = Word(srange("[A-Z]"), max=1) + Optional( Word(srange("[a-z]"), max=1)) parserBasisName = Word(alphanums + "-") + Suppress(restOfLine) parserFormat = OneOrMore(natural + NotAny(FollowedBy(point))) parserKey = (parseAtomLabel.setResultsName("atom") + parserBasisName.setResultsName("basisName") + Suppress(Literal("1"))) parserBasisData = OneOrMore(floatNumber) parserBasis = (parserKey + parserFormat.setResultsName("format") + parserBasisData.setResultsName("coeffs")) topParseBasis = OneOrMore(Suppress(comment)) + OneOrMore( Group(parserBasis + Suppress(Optional(OneOrMore(comment))))) # ===============================<>==================================== # Parsing From File
"""XYZ file format readers.""" from pyparsing import (Group, LineEnd, OneOrMore, Regex, Suppress, Word, alphas, nums, restOfLine) __all__ = ["parser_xyz"] natural = Word(nums) parse_float = Regex(r'(\-)?\d+(\.)(\d*)?([eE][\-\+]\d+)?') header = natural + LineEnd() + restOfLine label = Word(alphas, max=2) xyz = parse_float * 3 parse_atom = label.setResultsName("label") + xyz.setResultsName("xyz") parser_xyz = Suppress(header) + OneOrMore(Group(parse_atom))
Word, alphanums, delimitedList, lineStart, ) from .sections import GDSection, GDSectionHeader from .values import value key = Word(alphanums + "_/").setName("key") var = Word(alphanums + "_").setName("variable") attribute = Group(var + Suppress("=") + value) # [node name="Node2D"] section_header = ((Suppress(lineStart) + Suppress("[") + var.setResultsName("section_type") + Optional(delimitedList(attribute, Empty())) + Suppress("]")).setName("section_header").setParseAction( GDSectionHeader.from_parser)) # texture = ExtResource( 1 ) section_entry = Group(Suppress(lineStart) + key + Suppress("=") + value).setName("section_entry") section_contents = delimitedList(section_entry, Empty()).setName("section_contents") # [node name="Sprite" type="Sprite"] # texture = ExtResource( 1 ) section = ((section_header + Optional(section_contents)).setName("section").setParseAction( GDSection.from_parser))
from pyparsing import Group from pyparsing import Literal from pyparsing import Optional from pyparsing import Or from pyparsing import Suppress from pyparsing import Word from pyparsing import ZeroOrMore from pyparsing import alphas from pyparsing import nums ## Alias of a name # Can start with alphabet and can have numbers and underscore alias = Word(alphas, alphas + nums + "_") alias_enclosed = Suppress(Literal("(")) \ + alias.setResultsName("alias") \ + Suppress(Literal(")")) ## Name of port, interface or instance # Can start with alphabet and can have numbers and underscore name = Word(alphas, alphas + nums + "_") name_with_alias = name.setResultsName("port") \ + Optional(alias_enclosed) ## Index of a port or signal # Can be a numer and enclosed in a square brace index = Word(nums) index_enclosed = Suppress(Literal("[")) \ + index.setResultsName("index") \ + Suppress(Literal("]")) ## Slice of a port or signal in the form "a:b"
#define MAX_LOCS=100 #define USERNAME = "******" #define PASSWORD = "******" a = MAX_LOCS; CORBA::initORB("xyzzy", USERNAME, PASSWORD ); """ ################# print("Example of an extractor") print("----------------------") # simple grammar to match #define's ident = Word(alphas, alphanums+"_") macroDef = Literal("#define") + ident.setResultsName("name") + "=" + restOfLine.setResultsName("value") for t,s,e in macroDef.scanString( testData ): print(t.name,":", t.value) # or a quick way to make a dictionary of the names and values # (return only key and value tokens, and construct dict from key-value pairs) # - empty ahead of restOfLine advances past leading whitespace, does implicit lstrip during parsing macroDef = Suppress("#define") + ident + Suppress("=") + empty + restOfLine macros = dict(list(macroDef.searchString(testData))) print("macros =", macros) print() ################# print("Examples of a transformer") print("----------------------")
bonename = Combine(~end + Word(alphanums+"_-")).setWhitespaceChars(' ') version = Keyword(":version") + Literal("1.10") skeletonName = Keyword(":name") + bonename.setResultsName('name') unitDefinition = Group(Word(alphas) + (floatValue | intValue | Word(alphas))) unitSection = Keyword(":units") + \ Dict(ZeroOrMore(unitDefinition)).setResultsName('units') documentationSection = Keyword(':documentation') + \ SkipTo(":").setResultsName('documentation') rootSection = Group(Keyword(":root") & (Keyword("order") + channels.setResultsName('channels')) & (Keyword("position") + floatVector.setResultsName('position')) & (Keyword("axis") + rotationOrder.setResultsName("axisRotationOrder")) & (Keyword("orientation") + floatVector.setResultsName("axis")) ).setResultsName('root') bone = Group( begin + Keyword("id") + intValue + Keyword("name") + bonename.setResultsName("name") + Keyword("direction") + floatVector.setResultsName("direction") + Keyword("length") + floatValue.setResultsName("length") + Keyword("axis") + floatVector.setResultsName("axis") +
# + Word(domain_charset_alphanums, exact=1) domain_label.setName('<level2-domain-label>') # NOGO: Do not consider merging subdomain_* with domain_generic_* # For subdomains, we can use underscore, practically anywhere within its domain label # Domain Registars mostly do not allow name registration having any underscore # End-user may however deploy underscore anywhere outside of 2nd and top level domain name subdomain_label_regex = '[A-Za-z0-9_]{1,1}(([A-Za-z0-9_\-]{0,61}){0,1}[A-Za-z0-9_]{1,1}){0,1}' # We do not do IDN/PunyCode syntax enforcement here, that is outside the scope of this parser subdomain_label = Regex(subdomain_label_regex) subdomain_label.setName('<subdomain_label>') # Generic Domain label, used for ANY level of its domain name domain_generic_label = Word(domain_charset_alphanums_dash_underscore, min=1, max=63) domain_generic_label.setName('<domain_generic_label>') domain_generic_label.setResultsName('domain_name') # domain_fqdn is very, very strict. Use sparingly; probably want to use domain_generic_fqdn # Original pyparsing draft for domain_fqdn was this: # domain_fqdn = Combine( # Optional( # ZeroOrMore( # subdomain_label # + Literal('.') # ) # + domain_label # + Literal('.') # ) # + tld_label # ) # Problem with above domain_fqdn is that PyParsing cannot do lookahead in time, so
def main(**kwargs): print 'Main arguments: ', kwargs verbosity = kwargs.get('verbosity') # Create log file log = errorReport_restricted.getErrorReport() log.startLog() # Grammar for parsing retweet retweeted = Word( alphanums + "_" + "-" ) grammar = Keyword("RT") + "@" + retweeted.setResultsName("name") + ":" # Connect to database and create tables dbAccess = databaseAccess_restricted.getDatabaseAccess() dbargs = dict([ (k,kwargs[k]) for k in ['host','db','user','passwd'] ]) dbAccess.makeConnection(**dbargs) dbAccess.createTables() rlapi = rateLimiter_restricted.getRateLimiter(**kwargs) # List of fields in the profile table profilefields = ['uid', 'name', 'screen_name', 'location', 'protected', 'utc_offset', 'time_zone', 'statuses_count', 'followers_count', 'friends_count', 'favourites_count', 'geo_enabled', 'lang', 'created_at'] # List of users we want to track users = ['gabioptavares'] for user in users: log.writeMessage('Started user: '******'Twitter API exception: ', e log.writeMessage('Twitter API exception: ' + e.message) continue # Insert profile into profile table try: dbAccess.profileToSQL(profile, profilefields, **kwargs) except databaseAccess_restricted.ConnectionClosed, e: log.writeError() print e.message log.writeMessage(e.message) dbAccess.makeConnection(**dbargs) print 'Connection reestablished.' log.writeMessage('Connection reestablished.')
# Terminals eq = Literal("=").suppress() begin = Literal("{").suppress() stop = Literal("}").suppress() quote = Literal('"').suppress() name = Word(alphas, alphas + nums + "_") quotedName = quote + name + quote yesno = Literal("yes") | Literal("no") flt = Word(nums + ".-").setParseAction(lambda s, l, t: float(t[0])) integer = Word(nums + "-").setParseAction(lambda s, l, t: int(t[0])) intList = begin + OneOrMore(integer) + stop floatList = begin + OneOrMore(flt) + stop definitionsLine = Literal("definitions").suppress() + eq + quotedName.setResultsName("quotedName") currentLine = Literal("current").suppress() + eq + flt.setResultsName("currentValue") localValueLine = Literal("local_value").suppress() + eq + flt.setResultsName("localValue") outgoingLine = Literal("outgoing").suppress() + eq + flt.setResultsName("outgoing") valueAddedOutgoingLine = (Literal("value_added_outgoing") + eq + flt).suppress() retentionLine = (Literal("retention") + eq + flt).suppress() steerPowerLine = (Literal("steer_power") + eq + flt).suppress() totalLine = (Literal("total").suppress() + eq + flt).suppress() # total trade power provincePower = Word("p_pow") ^ Word("province_power") provincePowerLine = (provincePower + eq + flt).suppress() maxLine = (Literal("max") + eq + flt).suppress() collectorPowerLine = (Literal("collector_power") + eq + flt).suppress() pullPowerLine = (Literal("pull_power") + eq + flt).suppress() retainPowerLine = (Literal("retain_power") + eq + flt).suppress() highestPowerLine = (Literal("highest_power") + eq + flt).suppress() pirateHuntLine = (Literal("pirate_hunt") + eq + flt).suppress() valueLine = Literal("value").suppress() + eq + flt.setResultsName("incomingValue", True)
from pyparsing import (Word, Combine, Optional, alphas, alphanums, oneOf, delimitedList, Group) header = "/usr/include/GL/gl.h" testdata = open(header).read() functions_skip = [ "glCreateDebugObjectMESA", "glBlendEquationSeparateATI", ] ident = Word(alphas, alphanums + "_") vartype = (Optional("const") + Combine(ident + Optional(Word("*")), adjacent = False)) arglist = delimitedList( Group(vartype.setResultsName("type") + \ ident.setResultsName("name")) ) functionCall = ident.setResultsName("name") + \ "(" + (arglist.setResultsName("args") | "void") + ")" + ";" typedef = "typedef" + Optional("unsigned") + Optional("signed") + \ ident + ident + ";" known_types = [] print """cdef extern from "%s":""" % (header) for fn, s, e in typedef.scanString(testdata): print " ctypedef", " ".join(fn[1:-1]) known_types.append(fn[-2]) print py_functions = [] for fn, s, e in functionCall.scanString(testdata): skip = False if fn.name in functions_skip:
del tokens[3] else: tokens["theoretical"] = False to_nom_value_and_std_dev(tokens) ufloat_theor.setParseAction(to_nom_val_and_std_dev_theor) # notes ::= note_value ("," note_value)* notes = delimitedList(note_value).setParseAction(lambda t: " ".join(t)) # mass_number ::= decimal mass_number = decimal # atomic_weight ::= ufloat | ( "[" float "," float "]" ) | ( "[" mass_number "]" ) atomic_weight = ufloat | \ ( LBRACK + float_.setResultsName("lwr_bnd") + Suppress(",") + float_.setResultsName("upr_bnd") + RBRACK ) | \ ( LBRACK + mass_number.setResultsName("stable_mass_number") + RBRACK ) def set_atomic_weight_type(tokens): if tokens.nominal_value != '': tokens['type'] = VAL_SD elif tokens.lwr_bnd != '': tokens['type'] = INTERVAL else: tokens['type'] = STABLE_MASS_NUM return tokens atomic_weight.setParseAction( set_atomic_weight_type)
orderBySpec = Group(columnName + Optional(orderseq)) orderByExpression << (orderBySpec.setResultsName("orderby_spec", listAllMatches=True) + ZeroOrMore("," + orderByExpression)) orderByClause = (orderbyToken + orderByExpression).setResultsName("orderby") limitClause = (limitToken + Group(Optional(intNum + ",") + intNum)).setResultsName("limit") trueOrFalse = trueToken | falseToken facetOrderBy = hitsToken | valueToken facetSpec = Group(columnName + ":" + "(" + trueOrFalse + "," + intNum + "," + intNum + "," + facetOrderBy + ")") browseByClause = (browseByToken + "(" + delimitedList(facetSpec).setResultsName("facet_specs") + ")") groupByClause = (groupByToken + columnName.setResultsName("groupby") + Optional(topToken + intNum.setResultsName("max_per_group"))) selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + ident.setResultsName("index") + Optional((whereToken + whereExpression.setResultsName("where"))) + ZeroOrMore(orderByClause | limitClause | browseByClause | groupByClause ) + Optional(";") )
dims = [dim.name for dim in list(space.dimensions)] if space.key not in dims: raise ValueError("Space key must be one of its dimensions.") for subspace in space.subspaces: for dim in set(subspace.dimensions): if dim not in dims: raise ValueError("Subspace dimension {0} must be one of its dimensions.".format(repr(name))) keysubspace = hdtypes.Subspace(dimensions=[space.key], nosearch=[], regions=list(space.keyregions)) subspaces = [keysubspace] + list(space.subspaces) return hdtypes.Space(space.name, space.dimensions, subspaces) identifier = Word(string.ascii_letters + string.digits + '_') integer = Word(string.digits).setParseAction(lambda t: int(t[0])) hexnum = Combine(Literal("0x") + Word(string.hexdigits)).setParseAction(lambda t: int(t[0][2:], 16)) dimension = identifier.setResultsName("name") + \ Optional(Suppress(Literal("(")) + (Literal("string") | Literal("uint64")) + Suppress(Literal(")")), default="string").setResultsName("type") dimension.setParseAction(parse_dimension) autoregion = Literal("auto") + integer + integer staticregion = Literal("region") + integer + hexnum + integer region = ZeroOrMore(Group(staticregion)) + Optional(Group(autoregion)) region.setParseAction(parse_regions) subspace = Literal("subspace").suppress() + \ Group(delimitedList(identifier)) + \ Optional(Suppress(Literal("nosearch")) + Group(delimitedList(identifier)), default=[]) + \ Group(region) subspace.setParseAction(parse_subspace) space = Literal("space").suppress() + identifier.setResultsName("name") + \
| make_interval("millisecond", "ms") | make_interval("minute", "m") | make_interval("second", "s") | make_interval("microsecond", "us") ) intervals = OneOrMore(interval) interval_fxn = Group( function("interval", quoted(intervals), caseless=True, optparen=True) ).setResultsName("interval") ts_expression = Forward() ts_expression <<= ( Group(ts_functions + oneOf("+ -") + interval_fxn).setResultsName("ts_expression") | ts_functions | Group(function("ms", Group(ts_expression), caseless=True)).setResultsName( "ts_function" ) ) value <<= Group(ts_expression | primitive | set_ | _emptyset | list_ | dict_).setName( "value" ) var_val = value | var.setResultsName("field") # Wrap these in a group so they can be used independently primitive = Group(primitive).setName("primitive") set_ = Group(set_ | _emptyset).setName("set") types = oneOf("s ss n ns b bs bool null l m", caseless=True).setParseAction( upcaseTokens ) filename = quotedString | Regex(r"[0-9A-Za-z/_\-\.]+")