def parse(input_string): def flatten_binary_operators(position, source, flattened_tokens): while len(flattened_tokens) >= 3: lhs, type_call, rhs = flattened_tokens[:3] flattened_tokens = [type_call(position, source, lhs, rhs)] + flattened_tokens[3:] return flattened_tokens[0] def flatten_unary_operators(position, source, flattened_tokens): type_call = flattened_tokens[0] return type_call(position, source, flattened_tokens[1]) # Packrat ParserElement.enablePackrat() lit_form = Suppress("form") lit_if = Suppress("if") lit_else = Suppress("else") lit_l_curly = Suppress("{") lit_r_curly = Suppress("}") lit_l_paren = Suppress("(") lit_r_paren = Suppress(")") lit_colon = Suppress(":") lit_assign_op = Suppress("=") lit_op_multiplication = Literal("*").setParseAction(lambda _: ast.Multiplication) lit_op_division = Literal("/").setParseAction(lambda _: ast.Division) lit_op_subtract = Literal("-").setParseAction(lambda _: ast.Subtraction) lit_op_addition = Literal("+").setParseAction(lambda _: ast.Addition) lit_op_positive = Literal("+").setParseAction(lambda _: ast.Positive) lit_op_negative = Literal("-").setParseAction(lambda _: ast.Negative) lit_op_not = Literal("!").setParseAction(lambda _: ast.Negation) lit_op_lower_exclusive = Literal("<").setParseAction(lambda _: ast.LowerExclusive) lit_op_lower_inclusive = Literal("<=").setParseAction(lambda _: ast.LowerInclusive) lit_op_greater_inclusive = Literal(">=").setParseAction(lambda _: ast.GreaterInclusive) lit_op_greater_exclusive = Literal(">").setParseAction(lambda _: ast.GreaterExclusive) lit_op_equality = Literal("==").setParseAction(lambda _: ast.Equality) lit_op_inequality = Literal("!=").setParseAction(lambda _: ast.Inequality) lit_op_and = Literal("&&").setParseAction(lambda _: ast.And) lit_op_or = Literal("||").setParseAction(lambda _: ast.Or) type_money = Literal("money").setParseAction( lambda source, position, _: ast.Money(position, source)) type_integer = Literal("integer").setParseAction( lambda source, position, _: ast.Integer(position, source)) type_boolean = Literal("boolean").setParseAction( lambda source, position, _: ast.Boolean(position, source)) type_string = Literal("string").setParseAction( lambda source, position, _: ast.String(position, source)) data_types = (type_money | type_integer | type_boolean | type_string) true = Literal("true").setParseAction( lambda source, position, _: ast.Boolean(position, source, True)) false = Literal("false").setParseAction( lambda source, position, _: ast.Boolean(position, source, False)) boolean = (true | false) integer = Word(nums).setParseAction( lambda source, position, parsed_tokens: ast.Integer(position, source, int(parsed_tokens[0]))) money = Combine(Word(nums) + Literal(".") + Word(nums)).setParseAction( lambda source, position, parsed_tokens: ast.Money(position, source, float(parsed_tokens[0]))) number = (money | integer) string = QuotedString("'", unquoteResults=True)\ .setParseAction( lambda source, position, parsed_tokens: ast.String(position, source, str(parsed_tokens[0]))) reserved_words = (lit_form | lit_if | lit_else | boolean | number | data_types) name = ~reserved_words + Word(alphas, alphanums + '_').setResultsName( 'identifier').setParseAction( lambda source, position, parsed_tokens: ast.Identifier(position, source, parsed_tokens[0])) operand_arith = (number | boolean | name | string) operand_list_arith = [ (lit_op_positive | lit_op_negative | lit_op_not, 1, opAssoc.RIGHT, lambda source, position, flattened_tokens: flatten_unary_operators(position, source, *flattened_tokens)), (lit_op_multiplication | lit_op_division, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_addition | lit_op_subtract, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), ] operand_list_bool = [ (lit_op_lower_inclusive | lit_op_greater_inclusive | lit_op_greater_exclusive | lit_op_lower_exclusive, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_equality | lit_op_inequality, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_and, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_or, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), ] literal_precedence = infixNotation( operand_arith, (operand_list_arith + operand_list_bool) ) expression = \ OneOrMore( literal_precedence | (lit_l_paren + literal_precedence + lit_r_paren) ) field = Forward() field_assignment = Forward() field_statement = ( QuotedString('"', unquoteResults=True).setResultsName("title") + name.setResultsName("identifier") + lit_colon + data_types.setResultsName("data_type") ) field <<= field_statement field.setParseAction(lambda source, position, parsed_tokens: ast.Field(position, source, *parsed_tokens)) field_assignment <<= field_statement + lit_assign_op + expression field_assignment.setParseAction( lambda source, position, parsed_tokens: ast.Assignment(position, source, *parsed_tokens)) field_order = field_assignment | field conditional_if = Forward() conditional_if_else = Forward() statement = Forward() body = Forward() if_statement = lit_if + lit_l_paren + expression + lit_r_paren + body conditional_if <<= if_statement conditional_if.setParseAction(ast.If) conditional_if_else <<= ( if_statement + Optional(lit_else + body).setResultsName('else_statement') ) conditional_if_else.setParseAction(ast.IfElse) conditional = conditional_if_else | conditional_if statement <<= (field_order | conditional) body <<= lit_l_curly + OneOrMore(statement) + lit_r_curly body.addParseAction(lambda parsed_tokens: [parsed_tokens.asList()]) body.setResultsName('statement_list') form = (lit_form + name + body)\ .addParseAction(lambda parsed_tokens: ast.Form(*parsed_tokens))\ .setResultsName('form')\ .parseWithTabs() return form.parseString(input_string).form
GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=') mongo_op = (GTE | GT | LTE | LT) mongo_op.setParseAction( lambda t: t[0].replace('>=', '$gte').replace('>', '$gt').replace('<=', '$lte').replace('<', '$lt') ) one_sided_range = Group(mongo_op('op') + valid_word('bound'))('onesidedrange') term = (_range | one_sided_range | regex | wildcard | phrase | single_term) clause << (Optional(field_name + COLON, default='__default_field__')('field') + (term('term') | Group(LPAR + query_expr + RPAR)('subquery'))) clause.addParseAction(SearchTerm) query_expr << infixNotation(clause, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT, SearchModifier), ((not_ | '!').setParseAction(lambda: 'NOT'), 1, opAssoc.RIGHT, SearchNot), ((and_ | '&&').setParseAction(lambda: 'AND'), 2, opAssoc.LEFT, SearchAnd), (Optional(or_ | '||').setParseAction(lambda: 'OR'), 2, opAssoc.LEFT, SearchOr), ]) class QueryParser: DEFAULT_FIELD = 'text' DEFAULT_OPERATOR = '$regex'
).addParseAction(to_case_call) selectStmt = Forward() compound = ( ( Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params") ).addParseAction(to_json_call) | ( Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params") ).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | case | ( Literal("(").setDebugActions(*debug).suppress() + selectStmt.addParseAction(subquery_call) + Literal(")").suppress() ) | ( Literal("(").setDebugActions(*debug).suppress() + Group(delimitedList(expr)) + Literal(")").suppress() ) | realNum.setName("float").setDebugActions(*debug) | intNum.setName("int").setDebugActions(*debug) | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction( to_json_call ) | sqlString.setName("string").setDebugActions(*debug) | ( Word(alphas)("op").setName("function name").setDebugActions(*debug)
(RBRACK('inclusive') | RBRACE('esclusive'))) _range = (lower_range + TO + upper_range)('range') GT = Literal('>') GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=') one_sided_range = Group((GTE | GT | LTE | LT)('op') + valid_word('bound'))('onesidedrange') term = (_range | one_sided_range | regex | wildcard | phrase | single_term) clause << (Optional(field_name + COLON, default='__default_field__')('field') + (term('term') | Group(LPAR + query_expr + RPAR)('subquery'))) clause.addParseAction(SearchTerm) query_expr << infixNotation(clause, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT, SearchModifier), (NOT.setParseAction(lambda: 'NOT'), 1, opAssoc.RIGHT, SearchNot), (AND.setParseAction(lambda: 'AND'), 2, opAssoc.LEFT, SearchAnd), (Optional(OR).setParseAction(lambda: 'OR'), 2, opAssoc.LEFT, SearchOr), ]) class QueryParser: DEFAULT_FIELD = 'text' def parse(self, query, default_field=None): default_field = default_field or QueryParser.DEFAULT_FIELD
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList( ident, ".", combine=True ) #columnName.setParseAction(upcaseTokens) columnNameList = Group( columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("."+ident) + Literal('(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group( selectableName + ZeroOrMore(","+selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group( columnNumericExpr + ZeroOrMore(","+columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList( ident, ".", combine=True ) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group( genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr# | numericExpr columnRvalList << Group( columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group( ( functionSpec + binop + columnRval) | ( namedRv + binop + columnRval ) | ( namedRv + in_ + "(" + columnRval + ZeroOrMore(","+namedRv) + ")" ) | ( namedRv + in_ + "(" + selectStmt + ")" ) | ( namedRv + between_ + namedRv + and_ + namedRv ) ) whereConditionFlat.addParseAction(self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ( "(" + whereExpression + ")" )) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE",str(["TRUE"])] and (i+1) < len(tok): if str(tok[i+1]).upper() == "AND": i += 2 continue elif str(tok[i+i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore( andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList ).setResultsName( "columns" ) whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << ( self.selectPart + fromToken + tableNameList.setResultsName( "tables" ) + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore( oracleSqlComment )
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True) #columnName.setParseAction(upcaseTokens) columnNameList = Group(columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("." + ident) + Literal( '(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group(selectableName + ZeroOrMore("," + selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group(columnNumericExpr + ZeroOrMore("," + columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList(ident, ".", combine=True) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group(genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr # | numericExpr columnRvalList << Group(columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group((functionSpec + binop + columnRval) | (namedRv + binop + columnRval) | (namedRv + in_ + "(" + columnRval + ZeroOrMore("," + namedRv) + ")") | (namedRv + in_ + "(" + selectStmt + ")") | (namedRv + between_ + namedRv + and_ + namedRv)) whereConditionFlat.addParseAction( self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ("(" + whereExpression + ")")) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE", str(["TRUE"]) ] and (i + 1) < len(tok): if str(tok[i + 1]).upper() == "AND": i += 2 continue elif str(tok[i + i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore(andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList).setResultsName("columns") whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << (self.selectPart + fromToken + tableNameList.setResultsName("tables") + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore(oracleSqlComment)