def parse_commanders(x): # Grammar LBRAK, RBRAK, COMMA = (pp.Literal(x).suppress() for x in "[],") AND = pp.Literal("and").suppress() and_ = pp.Optional(COMMA) + AND RANK = pp.Or(pp.Literal(x) for x in RANKS_LOOKUP.keys())("rank") NAME = pp.Word(pp.srange("[A-Z]"), pp.alphas + pp.alphas8bit, min=2) ABBR = pp.Regex(r"([A-Z]\.)+") SUFFIX = pp.Literal("Jr.") NOCMDR = (pp.Literal("None"))("none") COMBATANT = pp.oneOf(' '.join(('US', 'CS', 'I')))("combatant") nametoken = ABBR | pp.quotedString | NAME name = (pp.OneOrMore(nametoken) + pp.Optional(pp.Literal(",") + SUFFIX))("fullname") name.addParseAction(lambda s,l,t: ' '.join(t)) cmdrname = RANK + name commander = pp.Group(cmdrname | NOCMDR) commander_list = pp.Group(pp.delimitedList(commander, ",") + pp.Optional(and_ + commander))("commanders") milforce = pp.Group(commander_list + LBRAK + COMBATANT + RBRAK) grammar = pp.delimitedList(milforce, ";") toks = grammar.parseString(x) # A smarter grammar could probably have avoided this res = {} for _force in toks: k = _force['combatant'] res[k] = [x.asDict() for x in _force['commanders'] if "none" not in x.asDict()] return res
def parse_connection_str(connstr): ## Grammar for connection syntax digits="0123456789" othervalid="_.@" identifier= Word(alphas+digits+othervalid) nodename=identifier.setResultsName('nodename') outputnames = delimitedList( identifier ).setResultsName('outputnames') inputnames = delimitedList( identifier ).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode= Group( nodename + Suppress('(') + inputnames + Optional( "|" + outputnames) + Suppress(")") ).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed=connectlist.parseString(connstr) check_numconnections(parsed) return parsed
def __init__(self): # literals star = Literal('*') comma = Suppress(',') # indentifiers identifier = Word(alphas, alphanums+'_') alias = identifier.copy() # select clause column_name = Combine(Optional(alias + '.') + identifier + Optional(' as ' + identifier))\ .setResultsName('column_name') select = Keyword('select', caseless=1) select_clause = (star | Group(delimitedList(column_name, comma)))\ .setResultsName('select_clause') # from clause from_ = Keyword('from', caseless=1) table_name = delimitedList(identifier + Optional(alias), comma) from_clause = table_name.setResultsName('from_clause') # select statment self.select_stmt = select + select_clause + from_ + from_clause
def func_tokens(dictionary, parse_action): func_name = Word(alphas+'_', alphanums+'_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, u'') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(u',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def parse_select_columns(string): """Parse a select query and return the columns Args: string(str): Input string to be parsed Returns: result(list of str): List of columns """ if string == "": return list() if string.upper().startswith("WITH"): suppressor = _with + delimitedList(_db_name + _as + subquery) string = suppressor.suppress().transformString(string) # Supress everything after the first from suppressor = MatchFirst(_from) + restOfLine string = suppressor.suppress().transformString(string) parser = _select + delimitedList(field_parser).setResultsName("columns") output = parser.parseString(string).columns.asList() # Strip extra whitespace from the string return [column.strip() for column in output]
def _create_update_expression(): """ Create the grammar for an update expression """ ine = (Word('if_not_exists') + Suppress('(') + var + Suppress(',') + var_val + Suppress(')')) list_append = (Word('list_append') + Suppress('(') + var_val + Suppress(',') + var_val + Suppress(')')) fxn = Group(ine | list_append).setResultsName('set_function') # value has to come before var to prevent parsing TRUE/FALSE as variables path = (value | fxn | var) set_val = ((path + oneOf('+ -') + path) | path) set_cmd = Group(var + Suppress('=') + set_val) set_expr = (Suppress(upkey('set')) + delimitedList(set_cmd)).setResultsName('set_expr') add_expr = (Suppress(upkey('add')) + delimitedList(Group(var + value)))\ .setResultsName('add_expr') delete_expr = (Suppress(upkey('delete')) + delimitedList(Group(var + value)))\ .setResultsName('delete_expr') remove_expr = ( Suppress( upkey('remove')) + delimitedList(var)).setResultsName('remove_expr') return OneOrMore(set_expr | add_expr | delete_expr | remove_expr)\ .setResultsName('update')
def parse_file(file_name): number = pp.Word(pp.nums) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") lbrace = pp.Literal('{').suppress() rbrace = pp.Literal('}').suppress() cls = pp.Keyword('class') colon = pp.Literal(":") semi = pp.Literal(";").suppress() langle = pp.Literal("<") rangle = pp.Literal(">") equals = pp.Literal("=") comma = pp.Literal(",") lparen = pp.Literal("(") rparen = pp.Literal(")") lbrack = pp.Literal("[") rbrack = pp.Literal("]") mins = pp.Literal("-") struct = pp.Keyword('struct') template = pp.Keyword('template') final = pp.Keyword('final')("final") stub = pp.Keyword('stub')("stub") with_colon = pp.Word(pp.alphanums + "_" + ":") btype = with_colon type = pp.Forward() nestedParens = pp.nestedExpr('<', '>') tmpl = pp.Group(btype("template_name") + langle.suppress() + pp.Group(pp.delimitedList(type)) + rangle.suppress()) type << (tmpl | btype) enum_lit = pp.Keyword('enum') enum_class = pp.Group(enum_lit + cls) ns = pp.Keyword("namespace") enum_init = equals.suppress() + pp.Optional(mins) + number enum_value = pp.Group(identifier + pp.Optional(enum_init)) enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) + pp.Optional(comma) + rbrace) content = pp.Forward() member_name = pp.Combine(pp.Group(identifier + pp.Optional(lparen + rparen))) attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') + rbrack.suppress() + rbrack.suppress()) opt_attribute = pp.Optional(attrib)("attribute") namespace = pp.Group(ns("type") + identifier("name") + lbrace + pp.Group(pp.OneOrMore(content))("content") + rbrace) enum = pp.Group(enum_class("type") + identifier("name") + colon.suppress() + identifier("underline_type") + enum_values("enum_values") + pp.Optional(semi).suppress()) default_value = equals.suppress() + pp.SkipTo(';') class_member = pp.Group(type("type") + member_name("name") + opt_attribute + pp.Optional(default_value)("default") + semi.suppress())("member") template_param = pp.Group(identifier("type") + identifier("name")) template_def = pp.Group(template + langle + pp.Group(pp.delimitedList(template_param))("params") + rangle) class_content = pp.Forward() class_def = pp.Group(pp.Optional(template_def)("template") + (cls | struct)("type") + with_colon("name") + pp.Optional(final) + pp.Optional(stub) + opt_attribute + lbrace + pp.Group(pp.ZeroOrMore(class_content))("members") + rbrace + pp.Optional(semi)) content << (enum | class_def | namespace) class_content << (enum | class_def | class_member) for varname in "enum class_def class_member content namespace template_def".split(): locals()[varname].setName(varname) rt = pp.OneOrMore(content) singleLineComment = "//" + pp.restOfLine rt.ignore(singleLineComment) rt.ignore(pp.cStyleComment) return rt.parseFile(file_name, parseAll=True)
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # matching float w/ regex is ugly but is recommended by pyparsing float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums+'+-', min=1) constituent_array = Group( delimitedList(Group(delimitedList(species_name, ',')), ':') ) param_types = CaselessKeyword('G') | CaselessKeyword('L') | \ CaselessKeyword('TC') | CaselessKeyword('BMAGN') # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = float_number + OneOrMore(SkipTo(';') \ + Suppress(';') + float_number + Suppress(Word('YNyn', exact=1))) # ELEMENT cmd_element = CaselessKeyword('ELEMENT') + Word(alphas+'/-', min=1, max=2) # TYPE_DEFINITION cmd_typedef = CaselessKeyword('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = CaselessKeyword('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = CaselessKeyword('DEFINE_SYSTEM_DEFAULT') # DEFAULT_COMMAND cmd_defcmd = CaselessKeyword('DEFAULT_COMMAND') # PHASE cmd_phase = CaselessKeyword('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = CaselessKeyword('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = CaselessKeyword('PARAMETER') + param_types + \ Suppress('(') + symbol_name + Suppress(',') + constituent_array + \ Suppress(';') + int_number + Suppress(')') + \ func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_typedef | \ cmd_function | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_phase | \ cmd_constituent | \ cmd_parameter | \ Empty() return all_commands
def generate_grammar(): the_range = generate_range() visual = generate_fullvisual() visual = visual ^ generate_sub_cmd() vim_cmd = generate_vim_cmd() complex_cmd = p.Group(the_range)('range') + p.delimitedList(p.Group(visual), delim=";")('cmd') return p.Group(vim_cmd)('vim_cmd') ^ p.Group(the_range)('range') ^ p.Group(p.delimitedList(p.Group(visual), delim=";"))('cmd') ^ p.Group(complex_cmd)('complex_cmd')
def build_foreign_key_parser(): attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')) new_attributes = pp.Optional(pp.delimitedList(attribute_name)).setResultsName('new_attributes') arrow = pp.Literal('->').suppress() ref_table = pp.Word(pp.alphas, pp.alphanums + '._').setResultsName('ref_table') left = pp.Literal('(').suppress() right = pp.Literal(')').suppress() ref_attrs = pp.Optional(left + pp.delimitedList(attribute_name) + right).setResultsName('ref_attrs') return new_attributes + arrow + ref_table + ref_attrs
def parse(self, request): input = request._rest_context.get('filter') if not input: return None condition_positions = [] operator = pp.Regex('|'.join(self.ALLOWED_OPERATORS)) number = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") AND = pp.Literal(LOGICAL_OPERATORS.AND) OR = pp.Literal(LOGICAL_OPERATORS.OR) NOT = pp.Literal(LOGICAL_OPERATORS.NOT) identifier = pp.Regex(r"[a-zA-Z]+[a-zA-Z0-9]*(_[a-zA-Z0-9]+)*") identifiers = pp.Group(pp.delimitedList(identifier, delim="__", combine=False)) comparison_term = pp.Forward() list_term = ( pp.Group( pp.Suppress('[') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress(']') ) | pp.Group( pp.Suppress('(') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress(')') ) | pp.Group( pp.Suppress('{') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress('}') ) ) string = ( pp.QuotedString("'", escChar='\\', unquoteResults=True) | pp.QuotedString('"', escChar='\\', unquoteResults=True) ) null = pp.Literal('null').setParseAction(lambda s,l,t: None) boolean = pp.Regex('|'.join(('true', 'false'))).setParseAction(lambda s, l, t: t[0] == 'true') comparison_term << (string | number | list_term | null | boolean) condition = pp.Group(identifiers + operator + comparison_term).setResultsName('condition') condition.setParseAction(lambda s, loc, tocs: condition_positions.append(loc)) expr = pp.operatorPrecedence( condition, [ (NOT, 1, pp.opAssoc.RIGHT,), (AND, 2, pp.opAssoc.LEFT,), (OR, 2, pp.opAssoc.LEFT,), ] ) try: return self._parse_to_conditions( expr.parseString(input, parseAll=True).asList()[0], list(condition_positions), condition, input ) except pp.ParseException as ex: raise FilterParserError( mark_safe(ugettext('Invalid filter value "{}"').format(input)) )
def parse(str): tokens = '' # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + selectStmt + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( selectToken + ( '*' | columnNameList ).setResultsName( "columns" ) + fromToken + tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("where") + whereExpression ), "" ).setResultsName("where") ) simpleSQL = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine simpleSQL.ignore( oracleSqlComment ) try: tokens = simpleSQL.parseString( str ) except ParseException, err: print " "*err.loc + "^\n" + err.msg print err
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation (colon,lbrace,rbrace,lbrack,rbrack,lparen,rparen, equals,comma,dot,slash,bslash,star,semi,langle,rangle) = map(Literal, r":{}[]()=,./\*;<>") # keywords (any_, attribute_, boolean_, case_, char_, const_, context_, default_, double_, enum_, exception_, FALSE_, fixed_, float_, inout_, interface_, in_, long_, module_, Object_, octet_, oneway_, out_, raises_, readonly_, sequence_, short_, string_, struct_, switch_, TRUE_, typedef_, unsigned_, union_, void_, wchar_, wstring_) = map(Keyword, """any attribute boolean case char const context default double enum exception FALSE fixed float inout interface in long module Object octet oneway out raises readonly sequence short string struct switch TRUE typedef unsigned union void wchar wstring""".split()) identifier = Word( alphas, alphanums + "_" ).setName("identifier") real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList( identifier, "::", combine=True ).setName("udType") typeName = ( any_ | boolean_ | char_ | double_ | fixed_ | float_ | long_ | octet_ | short_ | string_ | wchar_ | wstring_ | udTypeName ).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group( sequence_ + langle + ( sequenceDef | typeName ) + rangle ) typeDef = sequenceDef | ( typeName + Optional( lbrack + integer + rbrack ) ) typedefDef = Group( typedef_ + typeDef + identifier + semi ).setName("typedef") moduleDef = Forward() constDef = Group( const_ + typeDef + identifier + equals + ( real | integer | quotedString ) + semi ) #| quotedString ) exceptionItem = Group( typeDef + identifier + semi ) exceptionDef = ( exception_ + identifier + lbrace + ZeroOrMore( exceptionItem ) + rbrace + semi ) attributeDef = Optional( readonly_ ) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group( ( inout_ | in_ | out_ ) + typeName + identifier ) ).setName( "paramlist" ) operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = ( constDef | exceptionDef | attributeDef | operationDef ) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = ( interfaceDef | exceptionDef | constDef | typedefDef | moduleDef ) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem ) + rbrace + semi bnf = ( moduleDef | OneOrMore( moduleItem ) ) singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
def syntax(): """https://docs.python.org/2/reference/expressions.html """ identifier = Word(alphas+'_', alphanums+'_') string_ = ( QuotedString('"') | QuotedString("'") ).setParseAction(composition(str, ''.join)) # FIXME unicode string (how is this handled in python 3?) important to be # able to destinguice between unicode and not unicode_string = ( Literal('u').suppress() + string_ ).setParseAction(composition(unicode, u''.join)) integer = ( Optional(oneOf(['+', '-']), default='+') + Word(nums) ).setParseAction(composition(int, ''.join)) long_ = ( integer + Literal('L').suppress() ).setParseAction(composition(long, ''.join, partial(map, str))) float_ = ( Optional(integer, default=0) + Literal('.') + Optional(Word(nums), default='0') ).setParseAction(composition(float, ''.join, partial(map, str))) # TODO complex number = float_ | long_ | integer atom = unicode_string | string_ | number expression = Forward() # FIXME name of this? dictionary = Group( Literal('{').suppress() + delimitedList( expression + Literal(':').suppress() + expression ) ).setParseAction(list) set_ = Group( Literal('{').suppress() + delimitedList( expression ) + Literal('}').suppress() ).setParseAction(list) list_ = Group( Literal('[').suppress() + delimitedList( expression ) + Literal(']').suppress() ).setParseAction(list) tuple_ = Group( Literal('(').suppress() + delimitedList( expression ) + Literal(')').suppress() ).setParseAction(tuple) instance = identifier + tuple_ expression << (atom | tuple_ | instance | dictionary | set_ | list_) return expression
def create_insert(): """ Create the grammar for the 'insert' statement """ insert = upkey('insert').setResultsName('action') attrs = Group(delimitedList(var)).setResultsName('attrs') # VALUES values_key = upkey('values') value_group = Group(Suppress('(') + delimitedList(value) + Suppress(')')) values = Group(delimitedList(value_group)).setResultsName('data') return (insert + into + table + Suppress('(') + attrs + Suppress(')') + values_key + values)
def build_foreign_key_parser(): left = pp.Literal('(').suppress() right = pp.Literal(')').suppress() attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')) new_attrs = pp.Optional(left + pp.delimitedList(attribute_name) + right).setResultsName('new_attrs') arrow = pp.Literal('->').suppress() lbracket = pp.Literal('[').suppress() rbracket = pp.Literal(']').suppress() option = pp.Word(pp.srange('[a-zA-Z]')) options = pp.Optional(lbracket + pp.delimitedList(option) + rbracket) ref_table = pp.Word(pp.alphas, pp.alphanums + '._').setResultsName('ref_table') ref_attrs = pp.Optional(left + pp.delimitedList(attribute_name) + right).setResultsName('ref_attrs') return new_attrs + arrow + options + ref_table + ref_attrs
def build_insert_grammar(): insert_grammar = Forward() insert_into_keyword_token = Keyword("insert into", caseless=True) values_token = Keyword("values", caseless=True) columns = Optional(Group(delimitedList(identifier_token, ","))) values_list_token = Group(delimitedList(Word(alphanums + " "), ",")) insert_grammar << insert_into_keyword_token + table_name_token.setResultsName("table_name") \ + Literal("(") + columns.setResultsName("columns") + Literal(")") + \ values_token + Literal("(") + values_list_token.setResultsName("values_list") + Literal(")") return insert_grammar
def buildMapSyntax(self): '''Subset of grammar for DMM files. "aai" = (/obj/structure/sign/securearea{desc = "A warning sign which reads 'HIGH VOLTAGE'"; icon_state = "shock"; name = "HIGH VOLTAGE"; pixel_y = -32},/turf/space,/area) ''' dreamList = pyp.Forward() # Literals singlelineString = pyp.QuotedString('"', '\\').setResultsName('string').setParseAction(self.makeListString) fileRef = pyp.QuotedString("'", '\\').setResultsName('fileRef').setParseAction(self.makeFileRef) multilineString = pyp.QuotedString(quoteChar='{"', endQuoteChar='"}', multiline=True).setResultsName('string').setParseAction(self.makeListString) number = pyp.Regex(r'\-?\d+(\.\d*)?([eE]\d+)?').setResultsName('number').setParseAction(self.makeListNumber) # Paths relpath = pyp.ident | relpath + SLASH + pyp.ident abspath = SLASH + relpath path = (abspath | relpath).setParseAction(self.handlePath) pathslash = path + SLASH # Other symbols listStart = pyp.Suppress('list(') openParen = pyp.Suppress("(") closeParen = pyp.Suppress(')') # Grammar listConstant = singlelineString | fileRef | multilineString | number | dreamList | abspath listElement = listConstant | (listConstant + '=' + listConstant) listElement = pyp.operatorPrecedence(listElement, [ ("=", 2, pyp.opAssoc.LEFT,), ]) listContents = pyp.delimitedList(listElement) dreamList << pyp.Group(listStart + listContents + closeParen) dreamList.setParseAction(self.makeList) # DMM Atom definition atomDefProperty = pyp.ident + "=" + listConstant atomDefProperty = pyp.operatorPrecedence(atomDefProperty, [ ("=", 2, pyp.opAssoc.LEFT,), ]) atomDefPropertyListContents = pyp.delimitedList(listElement, delim=';') atomDefProperties = pyp.Suppress("{") + atomDefPropertyListContents + pyp.Suppress("}") atomDef = abspath | abspath + atomDefProperties # DMM Tile Definition tileDefListContents = pyp.delimitedList(atomDef) tileDefAtomList = openParen + tileDefListContents + closeParen tileDef = singlelineString + '=' + tileDefAtomList tileDef.setParseAction(self.makeTileDef) return tileDef
def grammar(): # terminals ---------------------------------- colon = Literal(':') equal = Suppress('=') slash = Suppress('/') open_paren = Suppress('(') close_paren = Suppress(')') open_brace = Suppress('{') close_brace = Suppress('}') # np:tagName --------------------------------- nspfx = Word(alphas) local_name = Word(alphas) tagname = Combine(nspfx + colon + local_name) # np:attr_name=attr_val ---------------------- attr_name = Word(alphas + ':') attr_val = Word(alphanums + ' -.%') attr_def = Group(attr_name + equal + attr_val) attr_list = open_brace + delimitedList(attr_def) + close_brace text = dblQuotedString.setParseAction(removeQuotes) # w:jc{val=right} ---------------------------- element = ( tagname('tagname') + Group(Optional(attr_list))('attr_list') + Optional(text, default='')('text') ).setParseAction(Element.from_token) child_node_list = Forward() node = Group( element('element') + Group(Optional(slash + child_node_list))('child_node_list') ).setParseAction(connect_node_children) child_node_list << ( open_paren + delimitedList(node) + close_paren | node ) root_node = ( element('element') + Group(Optional(slash + child_node_list))('child_node_list') + stringEnd ).setParseAction(connect_root_node_children) return root_node
def define_operand(self): """ Return the syntax definition for an operand. An operand can be a variable, a string, a number or a set. A set is made of other operands, including other sets. **This method shouldn't be overridden**. Instead, override the syntax definitions for variables, strings and/or numbers. If you want to customize the sets, check :meth:`T_SET_START`, :meth:`T_SET_END` and :meth:`T_ELEMENT_SEPARATOR`. """ identifier = self.define_identifier() operand = Forward() # Defining the sets: set_start = Suppress(self._grammar.get_token("set_start")) set_end = Suppress(self._grammar.get_token("set_end")) element_separator = self._grammar.get_token("element_separator") elements = delimitedList(operand, delim=element_separator) set_ = Group(set_start + Optional(elements) + set_end) set_.setParseAction(self.make_set) set_.setName("set") # Defining the variables: variable = identifier.copy() variable.setName("variable") variable.addParseAction(self.make_variable) # Defining the functions: function_name = identifier.setResultsName("function_name") function_name.setName("function_name") args_start = Suppress(self._grammar.get_token("arguments_start")) args_end = Suppress(self._grammar.get_token("arguments_end")) args_sep = self._grammar.get_token("arguments_separator") arguments = Optional(Group(delimitedList(operand, delim=args_sep)), default=()) arguments = arguments.setResultsName("arguments") arguments.setParseAction(lambda tokens: tokens[0]) function = function_name + args_start + arguments + args_end function.setName("function") function.setParseAction(self.make_function) operand << (function | variable | self.define_number() | \ self.define_string() | set_) return operand
def __init__(self): self.realNum = Combine( Optional(tokens.arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( tokens.E + Optional(tokens.arithSign) + Word(nums) ) ) self.intNum = Combine( Optional(tokens.arithSign) + Word( nums ) + Optional( tokens.E + Optional("+") + Word(nums) ) ) self.columnRval = self.realNum | self.intNum | quotedString | tokens.columnName # need to add support for alg expressions self.whereCondition = Group( ( tokens.columnName + tokens.binop + self.columnRval ) | ( tokens.columnName + tokens.in_ + tokens.LPAREN + delimitedList( self.columnRval ) + tokens.RPAREN ) | ( tokens.columnName + tokens.in_ + tokens.LPAREN + tokens.selectStmt + tokens.RPAREN ) | ( tokens.LPAREN + tokens.whereExpression + tokens.RPAREN ) ) tokens.whereExpression << (self.whereCondition + Optional( ZeroOrMore( tokens.and_ | tokens.or_ ) + tokens.whereExpression )) self.joinCondition = Group( ( tokens.tableName + tokens.on_ + tokens.whereExpression ) ) tokens.joinExpression << (self.joinCondition ) # define the grammar tokens.selectStmt << ( tokens.selectToken.setResultsName("command") + ( '*' | tokens.columnNameList ).setResultsName( "columns" ) + tokens.fromToken + tokens.tableNameList.setResultsName( "tables" ) + Optional( Group( CaselessLiteral("join") + tokens.joinExpression ), "" ).setResultsName("join") + Optional( Group( Suppress(CaselessLiteral("where")) + tokens.whereExpression ), "" ).setResultsName("where") ) #self.valuesIter = ( self.columnRval | "," + self.columnRval) tokens.insertStmt << (tokens.insertToken.setResultsName("command") + tokens.intoToken.setResultsName("middle") + tokens.columnNameList.setResultsName( "tables" ) + tokens.valuesToken.setResultsName("val") + tokens.LPAREN + Group(delimitedList(self.columnRval, delim=r', ')).setResultsName("insValues") + tokens.RPAREN ) self.simpleSQL = tokens.selectStmt | tokens.insertStmt # define Oracle comment format, and ignore them self.oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore( self.oracleSqlComment )
def build_foreign_key_parser_old(): # old-style foreign key parser. Superceded by expression-based syntax. See issue #436 # This will be deprecated in a future release. left = pp.Literal('(').suppress() right = pp.Literal(')').suppress() attribute_name = pp.Word(pp.srange('[a-z]'), pp.srange('[a-z0-9_]')) new_attrs = pp.Optional(left + pp.delimitedList(attribute_name) + right).setResultsName('new_attrs') arrow = pp.Literal('->').suppress() lbracket = pp.Literal('[').suppress() rbracket = pp.Literal(']').suppress() option = pp.Word(pp.srange('[a-zA-Z]')) options = pp.Optional(lbracket + pp.delimitedList(option) + rbracket).setResultsName('options') ref_table = pp.Word(pp.alphas, pp.alphanums + '._').setResultsName('ref_table') ref_attrs = pp.Optional(left + pp.delimitedList(attribute_name) + right).setResultsName('ref_attrs') return new_attrs + arrow + options + ref_table + ref_attrs
def validate_sshRSAAuthKey(val, mode='update'): hostname = pyparsing.Word(pyparsing.alphanums+'-.') flag = ( # reject flags: cert-authority pyparsing.CaselessKeyword('no-agent-forwarding') | pyparsing.CaselessKeyword('no-port-forwarding') | pyparsing.CaselessKeyword('no-pty') | pyparsing.CaselessKeyword('no-user-rc') | pyparsing.CaselessKeyword('no-X11-forwarding') ) key = ( # reject keys: principals, tunnel pyparsing.CaselessKeyword('command') | pyparsing.CaselessKeyword('environment') | pyparsing.CaselessKeyword('from') | pyparsing.CaselessKeyword('permitopen') ) keyval = key + pyparsing.Literal('=') + pyparsing.QuotedString('"', unquoteResults=False) options = pyparsing.delimitedList(flag | keyval, combine=True) allowed_hosts = ( pyparsing.Suppress(pyparsing.Keyword('allowed_hosts')) + pyparsing.Suppress(pyparsing.Literal('=')) + pyparsing.Group(pyparsing.delimitedList(hostname)) ) update = ( pyparsing.LineStart() + pyparsing.Optional(allowed_hosts, default=[]) + pyparsing.Optional(options, default=[]) + pyparsing.Keyword('ssh-rsa') + pyparsing.Regex('[a-zA-Z0-9=/+]+') + pyparsing.Optional(pyparsing.Regex('.*'), default='') + pyparsing.LineEnd() ) delete = ( pyparsing.LineStart() + pyparsing.Keyword('ssh-rsa') + pyparsing.Regex('[a-zA-Z0-9=/+]+') + pyparsing.LineEnd() ) try: validator = update if mode is 'update' else delete tokens = validator.parseString(val) if tokens[0] and not set(tokens[0]).issubset(set([x.hostname for x in Host.objects.all()])): raise ValidationError('unknown host in allowed_hosts') validate_sshRSAAuthKey_key(tokens[3]) return tokens except ValidationError as err: raise err except Exception as err: raise ValidationError(err)
def __init__(self): self.select_stmt = Forward().setName("select statement") self.itemName = MatchFirst(Keyword("itemName()")).setParseAction(self.ItemName) self.count = MatchFirst(Keyword("count(*)")).setParseAction(self.Count) self.identifier = ((~keyword + Word(alphas, alphanums+"_")) | QuotedString("`")) self.column_name = (self.itemName | self.identifier.copy()) self.table_name = self.identifier.copy() self.function_name = self.identifier.copy() # expression self.expr = Forward().setName("expression") self.integer = Regex(r"[+-]?\d+") self.string_literal = QuotedString("'") self.literal_value = self.string_literal self.expr_term = ( self.itemName | self.function_name + LPAR + Optional(delimitedList(self.expr)) + RPAR | self.literal_value.setParseAction(self.Literal) | NULL.setParseAction(self.Null) | self.identifier.setParseAction(self.Identifier) | (EVERY + LPAR + self.identifier.setParseAction(self.Identifier) + RPAR).setParseAction(self.EveryIdentifier) | (LPAR + Optional(delimitedList(self.literal_value.setParseAction(self.Literal))) + RPAR).setParseAction(self.ValueList) ) self.expr << (operatorPrecedence(self.expr_term, [ (NOT, UNARY, opAssoc.RIGHT, self.BoolNot), (oneOf('< <= > >='), BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), (oneOf('= == != <>') | Group(IS + NOT) | IS | IN | LIKE, BINARY, opAssoc.LEFT, self.BinaryComparisonOperator), ((BETWEEN,AND), TERNARY, opAssoc.LEFT, self.BetweenXAndY), (OR, BINARY, opAssoc.LEFT, self.BoolOr), (AND, BINARY, opAssoc.LEFT, self.BoolAnd), (INTERSECTION, BINARY, opAssoc.LEFT, self.Intersection), ])).setParseAction(self.dont_allow_non_comparing_terms) self.ordering_term = (self.itemName | self.identifier) + Optional(ASC | DESC) self.single_source = self.table_name("table") self.result_column = Group("*" | self.count | delimitedList(self.column_name))("columns") self.select_core = (SELECT + self.result_column + FROM + self.single_source + Optional(WHERE + self.expr("where_expr"))) self.select_stmt << (self.select_core + Optional(ORDER + BY + Group(delimitedList(self.ordering_term))).setParseAction(self.OrderByTerms)("order_by_terms") + Optional(LIMIT + self.integer)("limit_terms"))
def get_column_defs(table_defs, args): """Parse supplied args to find table/column specifiers, and find those columns within the table definitions (table_defs). Allowed syntax:: <table>:<col1>,... # col1,... in specified table <col1>,.. # col1,... in any table (must be unique) @<filename> # File <filename> containing lines in above format """ def get_columns_in_table(table_name, col_names): """Find specified col_names in table_name. If table_name is not provided then look in all tables in within table_defs. Return list of dicts (table, name) for each column.""" columns = [] if table_name: if table_name not in table_defs: raise InvalidTableOrColumn, "No table named %s" % table_name table_columns = table_defs[table_name]['columns'] skip_cols = ['time', 'quality'] skip_cols += [x['name'] for x in table_columns if not x.get('is_output', True)] table_col_names = [x['name'] for x in table_columns if x['name'] not in skip_cols] if not col_names: col_names = table_col_names for col_name in col_names: if col_name not in table_col_names: raise InvalidTableOrColumn, \ "No column %s in %s table" % (col_name, table_name) columns.append({'table': table_name, 'name' : col_name}) else: for col_name in col_names: for table_name in table_defs: try: column = get_columns_in_table(table_name, [col_name]) break except InvalidTableOrColumn: pass else: raise InvalidTableOrColumn('Column %s not found in any table' % col_name) columns += column return columns columns = [] table_name = Word(alphanums + '_-').setResultsName("table_name") + ':' col_name = Word(alphanums + '_-') opt_table_and_cols = Optional(table_name) + \ delimitedList(col_name).setResultsName("col_names") arg_parse = lineStart + ( opt_table_and_cols | table_name ) + lineEnd for arg in args: try: results = arg_parse.parseString(arg) except ParseException, msg: raise ParseException("Bad column specifier syntax in %s" % arg) columns += get_columns_in_table(results.table_name, results.col_names)
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer ) + timeZoneOffset + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def _attribute_clause(): return ( _ATTRIBUTE + _DOUBLE_OPEN_PARENTHESIS + pyparsing.delimitedList(_attribute()) + _DOUBLE_CLOSE_PARENTHESIS )
def set_utility_function(self, formula, typ='from_formula'): """ creates a utility function from formula Utility_functions are then used as an argument in consume_with_utility, predict_utility and predict_utility_and_consumption. create_utility_function_fast is faster but more complicated utility_function Args: "formula": equation or set of equations that describe the utility function. (string) needs to start with 'utility = ...' Returns: A utility_function Example: formula = 'utility = ball + paint' self._utility_function = self.create_utility_function(formula) self.consume_with_utility(self._utility_function, {'ball' : 1, 'paint' : 2}) //exponential is ** not ^ """ parse_single_input = pp.Suppress(pp.Word(pp.alphas + "_", pp.alphanums + "_")) + pp.Suppress('=') \ + pp.OneOrMore(pp.Suppress(pp.Optional(pp.Word(pp.nums + '*/+-().[]{} '))) + pp.Word(pp.alphas + "_", pp.alphanums + "_")) parse_input = pp.delimitedList(parse_single_input, ';') self._utility_function = {} self._utility_function['type'] = typ self._utility_function['formula'] = formula self._utility_function['code'] = compiler.compile(formula, '<string>', 'exec') self._utility_function['input'] = list(parse_input.parseString(formula))
def range_builder(text): """Return a list of numbers from a string range of the form 1-3,4,5 """ from pyparsing import Word, Group, Suppress, delimitedList, nums, \ ParseException, ParseResults rng = Group(Word(nums) + Suppress('-') + Word(nums)) range_list = delimitedList(rng | Word(nums)) token = None try: tokens = range_list.parseString(text) except (AttributeError, ParseException) as e: return [] values = set() for rng in tokens: if isinstance(rng, ParseResults): # get here if the token is a range start = int(rng[0]) end = int(rng[1]) + 1 check(start<end, 'start must be less than end') values.update(range(start, end)) else: # get here if the token is an integer values.add(int(rng)) return list(values)
def prepareParser(self): integer = Word( nums ) ipAddress = delimitedList(Word(alphas+nums+"-_&#"), ".", combine=True) #ipAddress = delimitedList(Word(alphas+nums+"-_"), ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Group( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer )+ timeZoneOffset + Suppress("]") ) logLine = (ipAddress.setResultsName("ipAddr") +\ Suppress("-") +\ ("-" | Word( alphas+nums+"@._" )).\ setResultsName("auth") +\ (serverDateTime | "-" ).\ setParseAction(self.timestampParser) +\ dblQuotedString.\ setResultsName("cmd").\ setParseAction(self.httpCmdParser)+\ (integer | "-").\ setResultsName("statusCode")+\ (integer | "-").\ setResultsName("numBytesSent")+\ (dblQuotedString | "-").\ setResultsName("redirectURL")+\ (dblQuotedString | "-").\ setResultsName("userAgent").\ setParseAction(self.userAgentParser) ) return logLine
(P.Optional(kw("unsigned")) + kw("int")) | \ (P.Optional(kw("unsigned")) + kw("hyper")) | \ kw("float") | kw("double") | kw("quadruple") | kw("bool") | \ identifier declaration = \ kw("void") | \ kw("opaque") + identifier + lit("[") + value + lit("]") | \ kw("opaque") + identifier + lit("<") + P.Optional(value) + lit(">") | \ kw("string") + identifier + lit("<") + P.Optional(value) + lit(">") | \ g(type_specifier) + identifier + lit("[") + value + lit("]") | \ g(type_specifier) + identifier + lit("<") + P.Optional(value) + lit(">") | \ g(type_specifier) + identifier | \ g(type_specifier) + lit('*') + identifier enum_body = s("{") + g(P.delimitedList(g(identifier + s('=') + constant))) + s("}") struct_body = s("{") + P.OneOrMore(g(declaration + s(";"))) + s("}") case_spec = g(s(kw("case")) - value + s(":") + g(declaration) + s(";")) default_spec = g(kw('default') - s(':') + g(declaration) + s(';')) union_body = \ s(kw("switch")) - s("(") + g(declaration) + s(")") + s("{") + \ g(P.OneOrMore(case_spec) + P.Optional(default_spec)) + \ s("}") constant_def = kw("const") - identifier + s("=") + constant + s(";") type_def = \ kw("typedef") - g(declaration) + s(";") | \
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't care about SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") # Top Section FILE_NAME = Word(alphas + nums + '-_.') alignment_def = Keyword('alignment') + EQUALS\ + FILE_NAME + SEMICOLON alignment_def.setParseAction(self.set_alignment) tree_def = Keyword('user_tree_topology') + EQUALS\ + FILE_NAME + SEMICOLON tree_def.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branch_def = simple_option('branchlengths') MODEL_NAME = Word(alphas + nums + '+' + ' ' + '_') model_list = delimitedList(MODEL_NAME) model_def = 'models' + EQUALS + model_list + SEMICOLON model_def.setParseAction(self.set_models) model_selection_def = simple_option("model_selection") top_section = alignment_def + Optional(tree_def) + branch_def + \ model_def + model_selection_def # Data Block Parsing column = Word(nums) block_name = Word(alphas + '_-' + nums) block_def = column("start") +\ Optional(Suppress("-") + column("end")) +\ Optional(Suppress("\\") + column("step")) block_def.setParseAction(self.define_range) block_list_def = Group(OneOrMore(Group(block_def))) user_subset_def = Optional("charset") + block_name("name") + \ EQUALS + block_list_def("parts") + SEMICOLON user_subset_def.setParseAction(self.define_user_subset) block_def_list = OneOrMore(Group(user_subset_def)) block_section = Suppress("[data_blocks]") + block_def_list block_def_list.setParseAction(self.check_blocks) # Scheme Parsing scheme_name = Word(alphas + '_-' + nums) # Make a copy, cos we set a different action on it user_subset_ref = block_name.copy() user_subset_ref.setParseAction(self.check_block_exists) subset = Group(Suppress("(") + delimitedList(user_subset_ref("name")) + Suppress(")")) subset.setParseAction(self.define_subset_grouping) scheme = Group(OneOrMore(subset)) scheme_def = scheme_name("name") + \ EQUALS + scheme("scheme") + SEMICOLON scheme_def.setParseAction(self.define_scheme) scheme_list = OneOrMore(Group(scheme_def)) scheme_algo = simple_option("search") scheme_section = \ Suppress("[schemes]") + scheme_algo + Optional(scheme_list) # We've defined the grammar for each section. # Here we just put it all together self.config_parser = ( top_section + block_section + scheme_section + stringEnd)
from pyparsing import Literal, CaselessLiteral, Word, delimitedList \ ,Optional, Combine, Group, alphas, nums, alphanums, Forward \ , oneOf, sglQuotedString, OneOrMore, ZeroOrMore, CharsNotIn \ , replaceWith skobki = "(" + ZeroOrMore(CharsNotIn(")")) + ")" field_def = OneOrMore(Word(alphas, alphanums + "_\"':-") | skobki) def field_act(s, loc, tok): return ("<" + tok[0] + "> " + " ".join(tok)).replace("\"", "\\\"") field_def.setParseAction(field_act) field_list_def = delimitedList(field_def) def field_list_act(toks): return " | ".join(toks) field_list_def.setParseAction(field_list_act) create_table_def = Literal("CREATE") + "TABLE" + Word(alphas,alphanums+"_").setResultsName("tablename") + \ "("+field_list_def.setResultsName("columns")+")"+ ";" def create_table_act(toks): return """"%(tablename)s" [\n\t label="<%(tablename)s> %(tablename)s | %(columns)s"\n\t shape="record"\n];""" % toks
def check_type_strictly(input, type_): if isinstance(input, type_): return input else: raise TypeError("Expected input to be of type {}, was {}.".format( type_, type(input))) _enclosed = pp.Forward() _nestedParens = pp.nestedExpr(opener="(", closer=")", content=_enclosed) _nestedBrackets = pp.nestedExpr(opener="[", closer="]", content=_enclosed) _enclosed << (pp.delimitedList(pp.pyparsing_common.number | pp.QuotedString('"') | pp.QuotedString("'") | _nestedParens | _nestedBrackets) | _nestedParens | _nestedBrackets) _key = pp.Word(pp.alphas) + pp.Suppress("=") _dict_value = _enclosed + (pp.Suppress(",") | pp.Suppress(pp.SkipTo(")"))) _args = pp.Optional(pp.delimitedList(_enclosed)).setResultsName("args") + ( pp.Suppress(",") | pp.Suppress(pp.SkipTo(_key)) | pp.Suppress(pp.SkipTo(")"))) _kwargs = pp.Optional(pp.dictOf(_key, _dict_value)).setResultsName("kwargs") _parameters = pp.Suppress(
class SQLCompiler(): class Element(): pass class DomainLevel(Element): def __init__(self, toks): self.level = int(toks[0][1:]) def __repr__(self): return '<d:%s>' % self.level def visit(self, ctx): if self.level not in range(0, 10): raise ValueError('domain level not in range 0-9') ctx.used_columns.add('d%s' % self.level) ctx.emit('d%d' % self.level) class DomainLevelLength(Element): def __init__(self, toks): self.level = int(toks[0][1:]) def __repr__(self): return '<l:%s>' % self.level def visit(self, ctx): if self.level not in range(0, 10): raise ValueError('domain level not in range 0-9') ctx.used_columns.add('d%s' % self.level) ctx.emit('LENGTH(d%d)' % self.level) class Identifier(Element): def __init__(self, toks): self.id = toks[0] def __repr__(self): return '<id:%s>' % self.id def visit(self, ctx): ctx.emit(ctx.lookup(self)) class Integer(Element): def __init__(self, toks): self.value = int(toks[0]) def __repr__(self): return '<i:%s>' % self.value def visit(self, ctx): ctx.emit(repr(self.value)) class Float(Element): def __init__(self, toks): self.value = float(toks[0]) def __repr__(self): return '<f:%s>' % self.value def visit(self, ctx): ctx.emit(repr(self.value)) class String(Element): def __init__(self, s, l, toks): self.value = toks[0] def __repr__(self): return '<s:%r>' % self.value def visit(self, ctx): ctx.emit(self.value) class Boolean(Element): def __init__(self, toks): self.value = toks[0] def visit(self, ctx): ctx.emit(self.value) class Interval(Element): def __init__(self, toks): if len(toks) == 2: t = dict(h=0, m=0) t[toks[1]] = toks[0].value elif len(toks) == 4: t = dict(hours=toks[0].value, minutes=toks[2].value) t = timedelta(hours=t['h'], minutes=t['m']) self.value = dict(h=int(t.total_seconds()) // 3600, m=(int(t.total_seconds()) // 60) % 60) def __repr__(self): return '%s' % self.value def visit(self, ctx): ctx.emit("INTERVAL '{h} hour {m} minute'".format(**self.value)) class FunctionCall(Element): def __init__(self, toks): self.func_name = toks[0] self.params = list(toks[1]) def __repr__(self): return '%s(%s)' % (self.func_name.id, self.params) def visit(self, ctx): template = ctx.get_function_template(self.func_name) for t in template: if isinstance(t, int): self.params[t].visit(ctx) else: ctx.emit(t) class NumRange(Element): def __init__(self, toks): self.range_from = toks[0] self.range_to = toks[-1] def __repr__(self): return '<r:%s,%s>' % (self.range_from, self.range_to) def visit(self, ctx): pass # Only for 'in' statements def __iter__(self): return iter(range(self.range_from.value, self.range_to.value + 1)) def __len__(self): return self.range_to.value - self.range_from.value + 1 class StringList(Element): def __init__(self, toks): self.items = list(item.value for item in toks) def __repr__(self): return '<sl:%s>' % (self.items, ) def visit(self, ctx): pass # Only for 'in' statements def __iter__(self): return iter(self.items) def __len__(self): return len(self.items) class NumberList(Element): def __init__(self, toks): self.items = list(item.value for item in toks) def __repr__(self): return '<nl:%s>' % (self.items, ) def visit(self, ctx): pass # Only for 'in' statements def __iter__(self): return iter(self.items) def __len__(self): return len(self.items) class InExpr(Element): def __init__(self, toks): self.left = toks[0] self.right = toks[1] def __repr__(self): return '%s in %s' % (self.left, self.right) def visit(self, ctx): ctx.emit('(') self.left.visit(ctx) ctx.emit(' IN (') for i, item in enumerate(self.right): ctx.emit(str(item)) if i != len(self.right) - 1: ctx.emit(',') ctx.emit('))') class CountExpr(Element): def __init__(self, toks): self.group = toks[0] self.pred = toks[-1] self.time_interval = None if len(toks) == 3: self.time_interval = toks[1] def __repr__(self): return '#%s|%s#' % (self.group, self.pred) def visit(self, ctx): def recursiveDictify(o): if isinstance(o, (list, tuple)): return [recursiveDictify(e) for e in o] elif isinstance(o, dict): return {k: recursiveDictify(e) for k, e in o.items()} elif isinstance(o, SQLCompiler.Element): return recursiveDictify(o.__dict__) else: return o counter, recycle = ctx.generate_name( ('count', recursiveDictify(list(self.group)), recursiveDictify(self.pred), recursiveDictify(self.time_interval))) ctx.emit(counter) if not recycle: ctx.down() ctx.push_mode('select') ctx.new_selected() ctx.emit('COUNT(') self.pred.visit(ctx) ctx.emit(' OR NULL) OVER(PARTITION BY ') for i, g in enumerate(self.group): g.visit(ctx) if i != len(self.group) - 1: ctx.emit(',') t = self.time_interval if t: ctx.emit(' ORDER BY timestamp RANGE BETWEEN ') self.time_interval.visit(ctx) ctx.emit(' PRECEDING AND ') self.time_interval.visit(ctx) ctx.emit(' FOLLOWING') ctx.emit(') AS ') ctx.emit(counter) ctx.up() ctx.pop_mode() class ForExpr(Element): def __init__(self, toks): self.iteratee = toks[0] self.iterator = toks[1] self.expr = toks[2] def __repr__(self): return '|%s in %s: %s|' % (self.iteratee, self.iterator, self.expr) def visit(self, ctx): ctx.emit('(') for i, item in enumerate(self.iterator): ctx.define(self.iteratee, str(item)) ctx.emit('(CASE WHEN (') self.expr.visit(ctx) ctx.emit(') THEN 1 ELSE 0 END)') if i != len(self.iterator) - 1: ctx.emit('+') ctx.undefine(self.iteratee) ctx.emit(')') # forfor class BinaryOp(Element): def __init__(self, toks): self.op = toks[0][1] self.left = toks[0][0] self.right = toks[0][2] def __repr__(self): return '%s %s %s' % (self.left, self.op, self.right) def visit(self, ctx): self.left.visit(ctx) ctx.emit(' ' + self.op.upper() + ' ') self.right.visit(ctx) class UnaryOp(Element): def __init__(self, toks): self.op = toks[0][0] self.right = toks[0][1] def __repr__(self): return '%s %s' % (self.op, self.right) def visit(self, ctx): ctx.emit(self.op + ' ') self.right.visit(ctx) class PredicateSet(Element): def __init__(self, toks): self.preds = toks def __repr__(self): return '{%s}' % (self.preds, ) def visit(self, ctx): for pred in self.preds: ctx.new_predicate() pred.visit(ctx) class PredicateList(Element): def __init__(self, toks): self.preds = toks def __repr__(self): return '%s' % self.preds def visit(self, ctx): for pred in self.preds: ctx.new_layer() pred.visit(ctx) expression = Forward() domain_level = Regex('d[0-9]+') domain_level.setParseAction(DomainLevel) domain_level_length = Regex('l[0-9]+') domain_level_length.setParseAction(DomainLevelLength) identifier = Word(alphas, alphanums) identifier.setParseAction(Identifier) num_integer = Regex(r'-?(0|[1-9][0-9]*)') num_integer.setParseAction(Integer) num_float = Regex(r'-?(0|[1-9][0-9]*)\.[0-9]*') num_float.setParseAction(Float) number = num_float | num_integer l = lambda s: Literal(s).suppress() string = sglQuotedString string.addParseAction(String) true_val = Literal('true') true_val.setParseAction(Boolean) false_val = Literal('false') false_val.setParseAction(Boolean) param_list = Group(delimitedList(expression)) function_call = identifier + l('(') + Optional(param_list) + l(')') function_call.setParseAction(FunctionCall) num_range = number + ',' + '...' + ',' + number num_range.setParseAction(NumRange) string_list = string + OneOrMore(l(',') + string) string_list.setParseAction(StringList) number_list = number + OneOrMore(l(',') + number) number_list.setParseAction(NumberList) enumeration = num_range | string_list | number_list h = number + Literal('h') m = number + Literal('m') time_interval = ((h + m) | h | m) time_interval.setParseAction(Interval) count_expr = l('[') + Group(delimitedList(identifier)) + Optional( l(':') + time_interval) + l('|') + expression + l(']') count_expr.setParseAction(CountExpr) for_expr = l('|') + identifier + l('in') + enumeration + l( ':') + expression + l('|') for_expr.setParseAction(ForExpr) in_expr = (domain_level | domain_level_length | identifier) + l('in') + enumeration in_expr.setParseAction(InExpr) value = for_expr | function_call | count_expr | in_expr | domain_level | domain_level_length | true_val | false_val | identifier | time_interval | string | number signop = oneOf('-') multop = oneOf('* /') plusop = oneOf('+ -') relop = oneOf('= != > >= < <=') notop = oneOf('not') andop = oneOf('and') orop = oneOf('or') infixNotation expression <<= operatorPrecedence(value, [(signop, 1, opAssoc.RIGHT, UnaryOp), (multop, 2, opAssoc.LEFT, BinaryOp), (plusop, 2, opAssoc.LEFT, BinaryOp), (relop, 2, opAssoc.LEFT, BinaryOp), (notop, 1, opAssoc.RIGHT, UnaryOp), (andop, 2, opAssoc.LEFT, BinaryOp), (orop, 2, opAssoc.LEFT, BinaryOp)]) predicate_set = l('{') + delimitedList(expression) + l('}') predicate_set.setParseAction(PredicateSet) predicate_list = delimitedList(predicate_set) predicate_list.setParseAction(PredicateList) parser = predicate_list def __init__(self, table_name, identifier_map, function_map, with_group_by=False): self.table_name = table_name self.identifier_map = identifier_map self.function_map = function_map self.with_group_by = with_group_by def compileSQL(self, s): ctx = ParseContext(self.identifier_map, self.function_map) parsed = self.parser.parseString(s, parseAll=True) parsed[0].visit(ctx) return BuilderSQL( ctx.layers, ctx.used_columns, self.table_name, self.identifier_map).build_sql(with_group_by=self.with_group_by) @classmethod def test(cls): tests = ('d0', 'l0', 'domain', "'com'", "match(domain, '^[a-z]{4,12}\\.com$')", "100.10", "ex in 1,2,3", "10+11", "-id", "10-10", "10*10", "9/11", "2.2", "10+f(10*1)", "[dst,d1|l1 = 5]", "|suf in 'com','biz','net': suf|", "|i in 5,...,12: i|") for t in tests and []: print('-' * 80) print('looking at', t) try: parsed = cls.expression.parseString(t, parseAll=True) except: print('got only', cls.expression.parseString(t), file=sys.stderr) else: ctx = ParseContext() parsed[0].visit(ctx) dgas = [('Bedep', r""" { match(domain, '^[a-z]{11,16}\.com$'), timestamp >= t0 - 2h, timestamp <= t0 }, { [dst:1h|match(d1,'[0-9]')] / [dst:1h|true] >= 0.2, [dst:60m|true] >= 18 } """), ('ConfickerAB', r""" {match(domain, '^[a-z]{5,12}\.(biz|com|info|net|org)$')}, { [dst|true] >= 25, |i in 5,...,12: [dst|l1=i]>=1| >= 5, |suffix in 'com','biz','info','net','org': [dst|d0=suffix]>=1| >= 4, [dst|l1=5 and d0 in 'com','info','net','org'] >= 1, [dst|l1=12 and d0 in 'com','info','net','org'] = 0 } """), ('Elephant', r""" {match(domain, '^[a-f0-9]{8}\.(com|info|net)$')}, { [dst| |suffix in 'com','info','net': [dst,d1|d0=suffix]>=1| >= 2 ] >= 16 } """)] identifier_map = { 't0': "(TIMESTAMP '2015-08-03 00:00:00')", 'domain': 'request', 'client': 'dst' } function_map = { 'match': ['(REGEXP_INSTR(', 0, ',', 1, ')>0)'], 'count': ['REGEXP_COUNT(', 0, ',', 1, ')'] } for dga_name, s in dgas: print('-' * 80) print('looking at', dga_name) print('with:') print(s) print('--') try: parsed = cls.parser.parseString(s, parseAll=True) except: print('got only', cls.parser.parseString(s), file=sys.stderr) raise else: print(parsed) ctx = ParseContext(identifier_map, function_map) parsed[0].visit(ctx) print( BuilderSQL(ctx.layers, ctx.used_columns).build_sql(with_group_by=True))
def _parse(text: Text): comment = Suppress('/*' + Regex(r'([^*]|[*][^/])*') + '*/') identifier = (Suppress('`') + Regex(r'[^`]+') + Suppress('`')).setParseAction(lambda toks: toks[0]) string = (Suppress("'") + Regex(r"([^']|\\.)*") + Suppress("'")).setParseAction(lambda toks: toks[0]) reference_option = (CaselessKeyword('RESTRICT') | CaselessKeyword('CASCADE') | CaselessKeyword('SET NULL') | CaselessKeyword('NO ACTION') | CaselessKeyword('SET DEFAULT')) reference_definition = ( Suppress(CaselessKeyword('REFERENCES')) + identifier('reference_tbl_name') + '(' + delimitedList(identifier)('tbl_column') + ')' + ZeroOrMore((Suppress(CaselessKeyword('ON DELETE')) + reference_option('on_delete')) | (Suppress(CaselessKeyword('ON UPDATE')) + reference_option('on_update')))) constraint_definition = ( (((CaselessKeyword('PRIMARY KEY')('type')) | ((CaselessKeyword('FULLTEXT KEY') | CaselessKeyword('UNIQUE KEY') | CaselessKeyword('KEY'))('type') + identifier('index_name'))) + '(' + delimitedList(identifier('key_part*')) + ')') | (Suppress(CaselessKeyword('CONSTRAINT')) + identifier('symbol') + ((CaselessKeyword('FOREIGN KEY')('type') + '(' + delimitedList(identifier('key_part*')) + ')' + reference_definition) | (CaselessKeyword('CHECK')('type') + Regex('[^,\n]+')))) ).setParseAction(Constraint) column_type = (Word(alphanums) + Optional('(' + Regex('[^)]+') + ')') + Optional(Suppress(CaselessKeyword('UNSIGNED')))) column_definition = ( identifier('col_name') + column_type('col_type') + ZeroOrMore( (CaselessKeyword('NULL') | CaselessKeyword('NOT NULL'))('nullability') | (CaselessKeyword('AUTO_INCREMENT'))('auto_increment') | (Suppress(CaselessKeyword('COMMENT')) + string('comment')) | (Suppress(CaselessKeyword('DEFAULT')) + (Word(alphanums + '_') | string).setParseAction(lambda toks: toks[0])('default')) | (Suppress(CaselessKeyword('ON DELETE')) + (Word(alphanums + '_') | reference_option)('on_delete')) | (Suppress(CaselessKeyword('ON UPDATE')) + (Word(alphanums + '_') | reference_option)('on_update'))) ).setParseAction(Column) create_definition = column_definition('column*') | constraint_definition( 'constraint*') create_table_statement = ( Suppress(CaselessKeyword('CREATE') + CaselessKeyword('TABLE')) + identifier('tbl_name') + Suppress('(') + delimitedList(create_definition) + Suppress(')') + Suppress(Regex('[^;]*'))).setParseAction(Table) parser = delimitedList(comment | create_table_statement('table*'), delim=';') + Suppress(Optional(';')) return parser.parseString(text, parseAll=True)['table']
# # Copyright (c) 2003,2016, Paul McGuire # from pyparsing import Word, delimitedList, Optional, \ Group, alphas, alphanums, Forward, oneOf, quotedString, \ infixNotation, opAssoc, \ ZeroOrMore, restOfLine, CaselessKeyword, pyparsing_common as ppc # define SQL tokens selectStmt = Forward() SELECT, FROM, WHERE, AND, OR, IN, IS, NOT, NULL = map(CaselessKeyword, "select from where and or in is not null".split()) NOT_NULL = NOT + NULL ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnName.addParseAction(ppc.upcaseTokens) columnNameList = Group( delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("table name") tableName.addParseAction(ppc.upcaseTokens) tableNameList = Group(delimitedList(tableName)) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) realNum = ppc.real() intNum = ppc.signed_integer() columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + columnRval ) | ( columnName + IN + Group("(" + delimitedList( columnRval ) + ")" )) | ( columnName + IN + Group("(" + selectStmt + ")" )) |
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True) #columnName.setParseAction(upcaseTokens) columnNameList = Group(columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("." + ident) + Literal( '(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group(selectableName + ZeroOrMore("," + selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group(columnNumericExpr + ZeroOrMore("," + columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList(ident, ".", combine=True) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group(genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr # | numericExpr columnRvalList << Group(columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group((functionSpec + binop + columnRval) | (namedRv + binop + columnRval) | (namedRv + in_ + "(" + columnRval + ZeroOrMore("," + namedRv) + ")") | (namedRv + in_ + "(" + selectStmt + ")") | (namedRv + between_ + namedRv + and_ + namedRv)) whereConditionFlat.addParseAction( self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ("(" + whereExpression + ")")) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE", str(["TRUE"]) ] and (i + 1) < len(tok): if str(tok[i + 1]).upper() == "AND": i += 2 continue elif str(tok[i + i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore(andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList).setResultsName("columns") whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << (self.selectPart + fromToken + tableNameList.setResultsName("tables") + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore(oracleSqlComment)
def parse(self, request): input = request._rest_context.get('filter') if not input: return None condition_positions = [] operator = pp.Regex('|'.join(self.ALLOWED_OPERATORS)) number = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") AND = pp.Literal(LOGICAL_OPERATORS.AND) OR = pp.Literal(LOGICAL_OPERATORS.OR) NOT = pp.Literal(LOGICAL_OPERATORS.NOT) identifier = pp.Regex(r"[a-zA-Z]+[a-zA-Z0-9]*(_[a-zA-Z0-9]+)*") identifiers = pp.Group( pp.delimitedList(identifier, delim="__", combine=False)) comparison_term = pp.Forward() list_term = ( pp.Group( pp.Suppress('[') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress(']')) | pp.Group( pp.Suppress('(') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress(')')) | pp.Group( pp.Suppress('{') + pp.delimitedList(comparison_term, delim=",", combine=False) + pp.Suppress('}'))) string = (pp.QuotedString("'", escChar='\\', unquoteResults=True) | pp.QuotedString('"', escChar='\\', unquoteResults=True)) null = pp.Literal('null').setParseAction(lambda s, l, t: None) boolean = pp.Regex('|'.join( ('true', 'false'))).setParseAction(lambda s, l, t: t[0] == 'true') comparison_term << (string | number | list_term | null | boolean) condition = pp.Group(identifiers + operator + comparison_term).setResultsName('condition') condition.setParseAction( lambda s, loc, tocs: condition_positions.append(loc)) expr = pp.operatorPrecedence(condition, [ ( NOT, 1, pp.opAssoc.RIGHT, ), ( AND, 2, pp.opAssoc.LEFT, ), ( OR, 2, pp.opAssoc.LEFT, ), ]) try: return self._parse_to_conditions( expr.parseString(input, parseAll=True).asList()[0], list(condition_positions), condition, input) except pp.ParseException as ex: raise FilterParserError( mark_safe(ugettext('Invalid filter value "{}"').format(input)))
def __init__(self): """ Setup the Backus Normal Form (BNF) parser logic. """ # Set an empty formula attribute self.formula = None # Instantiate blank parser for BNF construction self.bnf = Forward() # Expression for parenthesis, which are suppressed in the atoms # after matching. lpar = Literal(const.LPAR).suppress() rpar = Literal(const.RPAR).suppress() # Expression for mathematical constants: Euler number and Pi e = Keyword(const.EULER) pi = Keyword(const.PI) null = Keyword(const.NULL) _true = Keyword(const.TRUE) _false = Keyword(const.FALSE) # Prepare operator expressions addop = oneOf(const.ADDOP) multop = oneOf(const.MULTOP) powop = oneOf(const.POWOP) unary = reduce(operator.add, (Optional(x) for x in const.UNOP)) # Expression for floating point numbers, allowing for scientific notation. number = Regex(const.NUMBER) # Variables are alphanumeric strings that represent keys in the input # data dictionary. variable = delimitedList(Word(alphanums), delim=const.VARIABLE_NAME_SEPARATOR, combine=True) # Functional calls function = Word(alphanums) + lpar + self.bnf + rpar # Atom core - a single element is either a math constant, # a function or a variable. atom_core = function | pi | e | null | _true | _false | number | variable # Atom subelement between parenthesis atom_subelement = lpar + self.bnf.suppress() + rpar # In atoms, pi and e need to be before the letters for it to be found atom = (unary + atom_core.setParseAction(self.push_first) | atom_subelement).setParseAction(self.push_unary_operator) # By defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) self.bnf << term + ZeroOrMore( (addop + term).setParseAction(self.push_first))
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) ref_phase_name = symbol_name = Word(alphanums + '_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + ref_phase_name + \ float_number + float_number + float_number + LineEnd() # SPECIES cmd_species = TCCommand( 'SPECIES') + species_name + chemical_formula + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
print("tokens.where =", tokens.where) except pyparsing.ParseException as err: print(" " * err.loc + "^\n" + err.msg) print(err) print() # define SQL tokens selectStmt = pyparsing.Forward() selectToken = pyparsing.Keyword("select", caseless=True) fromToken = pyparsing.Keyword("from", caseless=True) whereToken = pyparsing.Keyword("where", caseless=True) ident = pyparsing.Word(pyparsing.alphas + "_", pyparsing.alphanums + "_$.").setName("identifier") columnName = pyparsing.delimitedList(ident, ".", combine=True) columnNameList = pyparsing.Group(pyparsing.delimitedList(columnName)) tableName = pyparsing.delimitedList(ident, ".", combine=True) tableNameList = pyparsing.Group(pyparsing.delimitedList(tableName)) whereExpression = pyparsing.Forward() and_ = pyparsing.Keyword("and", caseless=True) or_ = pyparsing.Keyword("or", caseless=True) in_ = pyparsing.Keyword("in", caseless=True) E = pyparsing.CaselessLiteral("E") binop = pyparsing.oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = pyparsing.Word("+-", exact=1) realNum = pyparsing.Combine( pyparsing.Optional(arithSign) + (pyparsing.Word(pyparsing.nums) + "." +
def _build_tgrep_parser(set_parse_actions=True): ''' Builds a pyparsing-based parser object for tokenizing and interpreting tgrep search strings. ''' tgrep_op = (pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*')) tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\', unquoteResults=False) tgrep_qstring_icase = pyparsing.Regex( 'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') tgrep_node_regex_icase = pyparsing.Regex( 'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') tgrep_expr = pyparsing.Forward() tgrep_relations = pyparsing.Forward() tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' tgrep_nltk_tree_pos = (pyparsing.Literal('N(') + pyparsing.Optional( pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional( pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',') + pyparsing.Optional(','))) + ')') tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) # see _tgrep_segmented_pattern_action tgrep_node_label_use_pred = tgrep_node_label_use.copy() macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') macro_name.setWhitespaceChars('') macro_use = pyparsing.Combine('@' + macro_name) tgrep_node_expr = (tgrep_node_label_use_pred | macro_use | tgrep_nltk_tree_pos | tgrep_qstring_icase | tgrep_node_regex_icase | tgrep_qstring | tgrep_node_regex | '*' | tgrep_node_literal) tgrep_node_expr2 = ( (tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') + tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr) tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 + pyparsing.ZeroOrMore("|" + tgrep_node_expr))) tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) tgrep_rel_conjunction = pyparsing.Forward() tgrep_rel_conjunction << ( tgrep_relation + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction)) tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( "|" + tgrep_relations) tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional( tgrep_relations) tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2) tgrep_exprs = ( pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + pyparsing.ZeroOrMore(';').suppress()) if set_parse_actions: tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) tgrep_node_label_use_pred.setParseAction( _tgrep_node_label_pred_use_action) macro_use.setParseAction(_tgrep_macro_use_action) tgrep_node.setParseAction(_tgrep_node_action) tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action) tgrep_parens.setParseAction(_tgrep_parens_action) tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) tgrep_relation.setParseAction(_tgrep_relation_action) tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action) tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) macro_defn.setParseAction(_macro_defn_action) # the whole expression is also the conjunction of two # predicates: the first node predicate, and the remaining # relation predicates tgrep_expr.setParseAction(_tgrep_conjunction_action) tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) tgrep_expr2.setParseAction( functools.partial(_tgrep_conjunction_action, join_char=':')) tgrep_exprs.setParseAction(_tgrep_exprs_action) return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") var_ = Keyword("var") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word(alphas, alphanums + "_").setName("identifier") real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("Int") udTypeName = delimitedList(identifier, "::", combine=True).setName("udType") # Use longest match for type, in case a user-defined type name starts with # a keyword type, like "stringSeq" or "longArray" typeName = (bool_ ^ byte_ ^ float_ ^ int_ ^ long_ ^ ascii_ ^ text_ ^ data_ ^ time_ ^ var_ ^ udTypeName).setName("type") listDef = Forward().setName("seq") dictDef = Forward().setName("dict") futureDef = Forward().setName("future") proxyDef = Forward().setName("proxy") typeDef = (listDef | dictDef | futureDef | proxyDef | typeName) listDef << Group(list_ + langle + typeDef + rangle)
Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas, alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") - LPAR + Group(condExpr)("condition") + COMMA + Group(expr)("if_true") + COMMA + Group(expr)("if_false") + RPAR) statFunc = lambda name: Group( CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR)) sumFunc = statFunc("sum") minFunc = statFunc("min") maxFunc = statFunc("max") aveFunc = statFunc("ave") funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc multOp = oneOf("* /") addOp = oneOf("+ -") numericLiteral = ppc.number operand = numericLiteral | funcCall | cellRange | cellRef arithExpr = infixNotation(operand, [ (multOp, 2, opAssoc.LEFT), (addOp, 2, opAssoc.LEFT), ])
def parser_bnf(): """Grammar for parsing podcast configuration files.""" at = Literal("@").suppress() caret = Literal("^") colon = Literal(":").suppress() left_bracket = Literal("[").suppress() period = Literal(".").suppress() right_bracket = Literal("]").suppress() # zero_index ::= [0-9]+ zero_index = Word(nums).setParseAction(lambda s, l, t: int(t[0])) # filename ::= [A-Za-z0-9][-A-Za-z0-9._ ]+ filename_first = Word(alphanums, exact=1) filename_rest = Word(alphanums + "-_/. ") filename = Combine(filename_first + Optional(filename_rest)) # millisecs ::= "." [0-9]+ millisecs = (Word(nums).setParseAction( lambda s, l, t: int(t[0][:3].ljust(3, "0"))).setResultsName("ms")) # hours, minutes, seconds ::= zero_index hours = zero_index.setResultsName("hh") minutes = zero_index.setResultsName("mm") seconds = zero_index.setResultsName("ss") hours_minutes = hours + colon + minutes + colon | minutes + colon secs_millisecs = (seconds + Optional(period + millisecs) | period + millisecs) # timestamp ::= [[hours ":"] minutes ":"] seconds ["." millisecs] timestamp = Optional(hours_minutes) + secs_millisecs # duration_file ::= "@", filename # We need a separate item for a lonely duration file timestamp so # that we can attach a parse action just to the lonely case. Using # duration_file alone means the parse action is attached to all # instances of duration_file. duration_file = at + filename.setResultsName("filename") lonely_duration_file = at + filename.setResultsName("filename") # timespecs ::= timestamp [duration_file | {timestamp}] # If duration_file timestamp is lonely, prepend a zero timestamp. timespecs = Or([ lonely_duration_file.setParseAction( lambda s, l, t: [timestamp.parseString("00:00:00.000"), t]), Group(timestamp) + duration_file, OneOrMore(Group(timestamp.setParseAction(default_timestamp_fields))) ]) # last_frame ::= "-1" | "last" last_frame = oneOf(["-1", "last"]).setParseAction(replaceWith(-1)) # frame_number ::= ":" (zero_index | last_frame) frame_number = colon - (zero_index | last_frame).setResultsName("num") # stream_number ::= ":" zero_index stream_number = colon - zero_index.setResultsName("num") # input_file ::= ":" [filename] input_file = colon - Optional(filename).setResultsName("filename") # previous_segment ::= ":" "^" previous_segment = colon - caret.setResultsName("filename") # frame_input_file ::= input_file | previous_segment frame_input_file = Or([input_file, previous_segment]) # av_trailer ::= input_file [stream_number] av_trailer = input_file + Optional(stream_number) # frame_type ::= "frame" | "f" frame_type = oneOf(["f", "frame"]).setParseAction(replaceWith("frame")) # frame_input ::= frame_type [frame_input_file [frame_number]] frame_input = (frame_type.setResultsName("type") + Optional(frame_input_file + Optional(frame_number))) # video_type ::= "video" | "v" video_type = oneOf(["v", "video"]).setParseAction(replaceWith("video")) # audio_type ::= "audio" | "a" audio_type = oneOf(["a", "audio"]).setParseAction(replaceWith("audio")) # av_input ::= (audio_type | video_type) [av_trailer] av_input = ((audio_type | video_type).setResultsName("type") + Optional(av_trailer)) # inputspec ::= "[" (av_input | frame_input) "]" inputspec = (left_bracket + delimitedList( av_input | frame_input, delim=":").setParseAction(default_input_fields) - right_bracket) # segmentspec ::= inputspec [timespecs] segmentspec = Group(inputspec + Group(Optional(timespecs)).setResultsName("times")) # config ::= {segmentspec} config = ZeroOrMore(segmentspec) config.ignore(pythonStyleComment) return config
type_keyword = pyparsing.Or([ pyparsing.Keyword(k) for k in 'void char short int long float double signed unsigned'.split() ]) type_decl = pyparsing.Group( pyparsing.Group( pyparsing.Optional(_const) + (pyparsing.OneOrMore(type_keyword) ^ identifier)) + pyparsing.Optional(pointer)) intent = _lbrk + pyparsing.Keyword('output') + _rbrk arg_decl = pyparsing.Group(type_decl + identifier + pyparsing.Optional(intent)) error_check = _lbrk + identifier + _rbrk function_decl = pyparsing.Group(type_decl + pyparsing.Optional(error_check) + identifier) function_prototype = (function_decl + _lpar + pyparsing.Group( pyparsing.Optional(pyparsing.delimitedList(arg_decl, ','))) + _rpar + pyparsing.Optional(_semi)) base_types = { 'bool': 'ctypes.c_bool', 'char': 'ctypes.c_char', 'char *': 'ctypes.c_char_p', 'double': 'ctypes.c_double', 'float': 'ctypes.c_float', 'int': 'ctypes.c_int', 'int16': 'ctypes.c_int16', 'int32': 'ctypes.c_int32', 'int64': 'ctypes.c_int64', 'int8': 'ctypes.c_int8', 'long': 'ctypes.c_long', 'long int': 'ctypes.c_long',