Esempio n. 1
0
def parse(input_string):
    def flatten_binary_operators(position, source, flattened_tokens):
        while len(flattened_tokens) >= 3:
            lhs, type_call, rhs = flattened_tokens[:3]
            flattened_tokens = [type_call(position, source, lhs, rhs)] + flattened_tokens[3:]
        return flattened_tokens[0]

    def flatten_unary_operators(position, source, flattened_tokens):
        type_call = flattened_tokens[0]
        return type_call(position, source, flattened_tokens[1])

    # Packrat
    ParserElement.enablePackrat()

    lit_form = Suppress("form")
    lit_if = Suppress("if")
    lit_else = Suppress("else")

    lit_l_curly = Suppress("{")
    lit_r_curly = Suppress("}")
    lit_l_paren = Suppress("(")
    lit_r_paren = Suppress(")")

    lit_colon = Suppress(":")
    lit_assign_op = Suppress("=")

    lit_op_multiplication = Literal("*").setParseAction(lambda _: ast.Multiplication)
    lit_op_division = Literal("/").setParseAction(lambda _: ast.Division)
    lit_op_subtract = Literal("-").setParseAction(lambda _: ast.Subtraction)
    lit_op_addition = Literal("+").setParseAction(lambda _: ast.Addition)
    lit_op_positive = Literal("+").setParseAction(lambda _: ast.Positive)
    lit_op_negative = Literal("-").setParseAction(lambda _: ast.Negative)

    lit_op_not = Literal("!").setParseAction(lambda _: ast.Negation)
    lit_op_lower_exclusive = Literal("<").setParseAction(lambda _: ast.LowerExclusive)
    lit_op_lower_inclusive = Literal("<=").setParseAction(lambda _: ast.LowerInclusive)
    lit_op_greater_inclusive = Literal(">=").setParseAction(lambda _: ast.GreaterInclusive)
    lit_op_greater_exclusive = Literal(">").setParseAction(lambda _: ast.GreaterExclusive)
    lit_op_equality = Literal("==").setParseAction(lambda _: ast.Equality)
    lit_op_inequality = Literal("!=").setParseAction(lambda _: ast.Inequality)
    lit_op_and = Literal("&&").setParseAction(lambda _: ast.And)
    lit_op_or = Literal("||").setParseAction(lambda _: ast.Or)

    type_money = Literal("money").setParseAction(
        lambda source, position, _: ast.Money(position, source))
    type_integer = Literal("integer").setParseAction(
        lambda source, position, _: ast.Integer(position, source))
    type_boolean = Literal("boolean").setParseAction(
        lambda source, position, _: ast.Boolean(position, source))

    type_string = Literal("string").setParseAction(
        lambda source, position, _: ast.String(position, source))

    data_types = (type_money | type_integer | type_boolean | type_string)

    true = Literal("true").setParseAction(
        lambda source, position, _: ast.Boolean(position, source, True))
    false = Literal("false").setParseAction(
        lambda source, position, _: ast.Boolean(position, source, False))
    boolean = (true | false)

    integer = Word(nums).setParseAction(
        lambda source, position, parsed_tokens: ast.Integer(position, source, int(parsed_tokens[0])))
    money = Combine(Word(nums) + Literal(".") + Word(nums)).setParseAction(
        lambda source, position, parsed_tokens: ast.Money(position, source, float(parsed_tokens[0])))
    number = (money | integer)

    string = QuotedString("'", unquoteResults=True)\
        .setParseAction(
            lambda source, position, parsed_tokens:
                ast.String(position, source, str(parsed_tokens[0])))

    reserved_words = (lit_form | lit_if | lit_else | boolean | number | data_types)

    name = ~reserved_words + Word(alphas, alphanums + '_').setResultsName(
        'identifier').setParseAction(
        lambda source, position, parsed_tokens: ast.Identifier(position, source, parsed_tokens[0]))

    operand_arith = (number | boolean | name | string)

    operand_list_arith = [
        (lit_op_positive | lit_op_negative | lit_op_not,
         1, opAssoc.RIGHT,
         lambda source, position, flattened_tokens: flatten_unary_operators(position, source, *flattened_tokens)),
        (lit_op_multiplication | lit_op_division,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
        (lit_op_addition | lit_op_subtract,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
    ]

    operand_list_bool = [
        (lit_op_lower_inclusive | lit_op_greater_inclusive | lit_op_greater_exclusive | lit_op_lower_exclusive,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
        (lit_op_equality | lit_op_inequality,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
        (lit_op_and,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
        (lit_op_or,
         2, opAssoc.LEFT,
         lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)),
    ]

    literal_precedence = infixNotation(
        operand_arith,
        (operand_list_arith + operand_list_bool)
    )

    expression = \
        OneOrMore(
            literal_precedence |
            (lit_l_paren + literal_precedence + lit_r_paren)
        )

    field = Forward()
    field_assignment = Forward()

    field_statement = (
        QuotedString('"', unquoteResults=True).setResultsName("title") +
        name.setResultsName("identifier") + lit_colon + data_types.setResultsName("data_type")
    )

    field <<= field_statement
    field.setParseAction(lambda source, position, parsed_tokens: ast.Field(position, source, *parsed_tokens))

    field_assignment <<= field_statement + lit_assign_op + expression
    field_assignment.setParseAction(
        lambda source, position, parsed_tokens: ast.Assignment(position, source, *parsed_tokens))

    field_order = field_assignment | field

    conditional_if = Forward()
    conditional_if_else = Forward()
    statement = Forward()
    body = Forward()

    if_statement = lit_if + lit_l_paren + expression + lit_r_paren + body
    conditional_if <<= if_statement
    conditional_if.setParseAction(ast.If)

    conditional_if_else <<= (
        if_statement +
        Optional(lit_else + body).setResultsName('else_statement')
    )
    conditional_if_else.setParseAction(ast.IfElse)

    conditional = conditional_if_else | conditional_if

    statement <<= (field_order | conditional)

    body <<= lit_l_curly + OneOrMore(statement) + lit_r_curly
    body.addParseAction(lambda parsed_tokens: [parsed_tokens.asList()])
    body.setResultsName('statement_list')

    form = (lit_form + name + body)\
        .addParseAction(lambda parsed_tokens: ast.Form(*parsed_tokens))\
        .setResultsName('form')\
        .parseWithTabs()
    return form.parseString(input_string).form
Esempio n. 2
0
GTE = Literal('>=')
LT = Literal('<')
LTE = Literal('<=')

mongo_op = (GTE | GT | LTE | LT)
mongo_op.setParseAction(
    lambda t: t[0].replace('>=', '$gte').replace('>', '$gt').replace('<=', '$lte').replace('<', '$lt')
)
one_sided_range = Group(mongo_op('op') + valid_word('bound'))('onesidedrange')

term = (_range | one_sided_range | regex | wildcard | phrase | single_term)

clause << (Optional(field_name + COLON, default='__default_field__')('field') +
           (term('term') | Group(LPAR + query_expr + RPAR)('subquery')))

clause.addParseAction(SearchTerm)

query_expr << infixNotation(clause,
                            [
                                (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT, SearchModifier),
                                ((not_ | '!').setParseAction(lambda: 'NOT'), 1, opAssoc.RIGHT, SearchNot),
                                ((and_ | '&&').setParseAction(lambda: 'AND'), 2, opAssoc.LEFT, SearchAnd),
                                (Optional(or_ | '||').setParseAction(lambda: 'OR'), 2, opAssoc.LEFT, SearchOr),
                            ])


class QueryParser:

    DEFAULT_FIELD = 'text'
    DEFAULT_OPERATOR = '$regex'
Esempio n. 3
0
).addParseAction(to_case_call)

selectStmt = Forward()
compound = (
    (
        Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")
    ).addParseAction(to_json_call)
    | (
        Keyword("distinct", caseless=True)("op").setDebugActions(*debug)
        + expr("params")
    ).addParseAction(to_json_call)
    | Keyword("null", caseless=True).setName("null").setDebugActions(*debug)
    | case
    | (
        Literal("(").setDebugActions(*debug).suppress()
        + selectStmt.addParseAction(subquery_call)
        + Literal(")").suppress()
    )
    | (
        Literal("(").setDebugActions(*debug).suppress()
        + Group(delimitedList(expr))
        + Literal(")").suppress()
    )
    | realNum.setName("float").setDebugActions(*debug)
    | intNum.setName("int").setDebugActions(*debug)
    | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(
        to_json_call
    )
    | sqlString.setName("string").setDebugActions(*debug)
    | (
        Word(alphas)("op").setName("function name").setDebugActions(*debug)
Esempio n. 4
0
                    (RBRACK('inclusive') | RBRACE('esclusive')))
_range = (lower_range + TO + upper_range)('range')

GT = Literal('>')
GTE = Literal('>=')
LT = Literal('<')
LTE = Literal('<=')
one_sided_range = Group((GTE | GT | LTE | LT)('op') +
                        valid_word('bound'))('onesidedrange')

term = (_range | one_sided_range | regex | wildcard | phrase | single_term)

clause << (Optional(field_name + COLON, default='__default_field__')('field') +
           (term('term') | Group(LPAR + query_expr + RPAR)('subquery')))

clause.addParseAction(SearchTerm)

query_expr << infixNotation(clause, [
    (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT, SearchModifier),
    (NOT.setParseAction(lambda: 'NOT'), 1, opAssoc.RIGHT, SearchNot),
    (AND.setParseAction(lambda: 'AND'), 2, opAssoc.LEFT, SearchAnd),
    (Optional(OR).setParseAction(lambda: 'OR'), 2, opAssoc.LEFT, SearchOr),
])


class QueryParser:

    DEFAULT_FIELD = 'text'

    def parse(self, query, default_field=None):
        default_field = default_field or QueryParser.DEFAULT_FIELD
Esempio n. 5
0
    def __init__(self):
        # define SQL tokens
        selectStmt = Forward()
        selectToken = Keyword("select", caseless=True)
        fromToken   = Keyword("from", caseless=True)
        asToken   = Keyword("as", caseless=True)
        whereToken = Keyword("where", caseless=True)
        semicolon = Literal(";")

        ident = Word( alphas, alphanums + "_$" ).setName("identifier")

        columnName     = delimitedList( ident, ".", combine=True )
        #columnName.setParseAction(upcaseTokens)
        columnNameList = Group( columnName + ZeroOrMore("," + columnName))
#        selectableList = Forward()
        columnRvalList = Forward()
        functionExpr = ident + Optional("."+ident) + Literal('(') + columnRvalList + Literal(')')
        alias = Forward()
        identExpr  = functionExpr | ident
        self.identExpr = identExpr # Debug
        self.functionExpr = functionExpr # Debug
        alias = ident.copy()

        selectableName = identExpr | columnName

        selectableList = Group( selectableName + ZeroOrMore(","+selectableName))
        columnRef = columnName
        functionSpec = functionExpr
        valueExprPrimary = functionSpec | columnRef
        numPrimary = valueExprPrimary ## | numericValFunc
        factor = Optional(Literal("+") | Literal("-")) + numPrimary
        muldiv = oneOf("* /")
        term = Forward()
        term << factor + Optional(muldiv + factor)
        numericExpr = Forward()
        addsub = oneOf("+ -")
        numericExpr << term + Optional(addsub + numericExpr)

        arithop = oneOf("+ - * /")
        columnNumericExpr = Forward()
        cTerm = valueExprPrimary
        testme = valueExprPrimary + arithop + valueExprPrimary
        columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr)
        colNumExpList = Group( columnNumericExpr + ZeroOrMore(","+columnNumericExpr)) 
        

        valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr
        derivedColumn = valueExpr + Optional(asToken + alias)
        selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn)


        tableName      = delimitedList( ident, ".", combine=True )
        # don't upcase table names anymore
        # tableName.setParseAction(upcaseTokens) 
        self.tableAction = []
        tableName.addParseAction(self.actionWrapper(self.tableAction))
        tableName.setResultsName("table")
        tableAlias = tableName + asToken + ident.setResultsName("aliasName")
        tableAlias.setResultsName("alias")
        genericTableName = tableAlias | tableName
        genericTableName = genericTableName.setResultsName("tablename")
        tableNameList  = Group( genericTableName 
                                + ZeroOrMore("," + genericTableName))

        whereExpression = Forward()
        and_ = Keyword("and", caseless=True)
        or_ = Keyword("or", caseless=True)
        in_ = Keyword("in", caseless=True)
        between_ = Keyword("between", caseless=True)
    
        E = CaselessLiteral("E")
        binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
        arithSign = Word("+-",exact=1)
        realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." 
                                                   + Optional( Word(nums) )  |
                                                   ( "." + Word(nums) ) ) + 
                           Optional( E + Optional(arithSign) + Word(nums) ) )
        intNum = Combine( Optional(arithSign) + Word( nums ) + 
                          Optional( E + Optional("+") + Word(nums) ) )

        # need to add support for alg expressions
        columnRval = realNum | intNum | quotedString | columnNumericExpr# | numericExpr 
        columnRvalList << Group( columnRval + ZeroOrMore("," + columnRval))

        self.whereExpAction = []

        namedRv = columnRval.setResultsName("column")
        whereConditionFlat = Group(
            ( functionSpec + binop + columnRval) |
            ( namedRv + binop + columnRval ) |
            ( namedRv + in_ + "(" + columnRval + ZeroOrMore(","+namedRv) + ")" ) |
            ( namedRv + in_ + "(" + selectStmt + ")" ) |
            ( namedRv + between_ + namedRv + and_ + namedRv ) )

        whereConditionFlat.addParseAction(self.actionWrapper(self.whereExpAction))
        whereCondition = Group(whereConditionFlat 
                               | ( "(" + whereExpression + ")" ))

        # Test code to try to make an expression parse.
        # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))")
        # goodFunction = ident + Literal('(') + columnNumericExpr  + Literal(')')
        # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True)
        # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)")


        #whereExpression << whereCondition.setResultsName("wherecond")
        #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) 
        def scAnd(tok):
            print "scAnd", tok
            if "TRUE" == tok[0][0]:
                tok = tok[2] 
            elif "TRUE" == tok[2][0]:
                tok = tok[0]
                
            return tok
        def scOr(tok):
            print "scOr", tok
            if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]):
                tok = [["TRUE"]]
            return tok
        def scWhere(tok):
            newtok = []
            i = 0
            while i < len(tok):
                if str(tok[i]) in ["TRUE",str(["TRUE"])] and (i+1) < len(tok):
                    if str(tok[i+1]).upper() == "AND":
                        i += 2
                        continue
                    elif str(tok[i+i]).upper() == "OR":
                        break
                newtok.append(tok[i])
                i += 1
            return newtok
        
        def collapseWhere(tok):
            #collapse.append(tok[0][1])
            if ["TRUE"] == tok.asList()[0][1]:
                tok = [] 
            return tok
        andExpr = and_ + whereExpression
        orExpr = or_ + whereExpression
        whereExpression << whereCondition + ZeroOrMore(
            andExpr | orExpr)
        whereExpression.addParseAction(scWhere)

        self.selectPart = selectToken + ( '*' | selectSubList ).setResultsName( "columns" )
        whereClause = Group(whereToken + 
                                     whereExpression).setResultsName("where") 
        whereClause.addParseAction(collapseWhere)
        self.fromPart = fromToken + tableNameList.setResultsName("tables")

        # define the grammar
        selectStmt      << ( self.selectPart +                         
                             fromToken + 
                             tableNameList.setResultsName( "tables" ) +
                             whereClause)
        
        self.simpleSQL = selectStmt + semicolon

        # define Oracle comment format, and ignore them
        oracleSqlComment = "--" + restOfLine
        self.simpleSQL.ignore( oracleSqlComment )
Esempio n. 6
0
    def __init__(self):
        # define SQL tokens
        selectStmt = Forward()
        selectToken = Keyword("select", caseless=True)
        fromToken = Keyword("from", caseless=True)
        asToken = Keyword("as", caseless=True)
        whereToken = Keyword("where", caseless=True)
        semicolon = Literal(";")

        ident = Word(alphas, alphanums + "_$").setName("identifier")

        columnName = delimitedList(ident, ".", combine=True)
        #columnName.setParseAction(upcaseTokens)
        columnNameList = Group(columnName + ZeroOrMore("," + columnName))
        #        selectableList = Forward()
        columnRvalList = Forward()
        functionExpr = ident + Optional("." + ident) + Literal(
            '(') + columnRvalList + Literal(')')
        alias = Forward()
        identExpr = functionExpr | ident
        self.identExpr = identExpr  # Debug
        self.functionExpr = functionExpr  # Debug
        alias = ident.copy()

        selectableName = identExpr | columnName

        selectableList = Group(selectableName +
                               ZeroOrMore("," + selectableName))
        columnRef = columnName
        functionSpec = functionExpr
        valueExprPrimary = functionSpec | columnRef
        numPrimary = valueExprPrimary  ## | numericValFunc
        factor = Optional(Literal("+") | Literal("-")) + numPrimary
        muldiv = oneOf("* /")
        term = Forward()
        term << factor + Optional(muldiv + factor)
        numericExpr = Forward()
        addsub = oneOf("+ -")
        numericExpr << term + Optional(addsub + numericExpr)

        arithop = oneOf("+ - * /")
        columnNumericExpr = Forward()
        cTerm = valueExprPrimary
        testme = valueExprPrimary + arithop + valueExprPrimary
        columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr)
        colNumExpList = Group(columnNumericExpr +
                              ZeroOrMore("," + columnNumericExpr))

        valueExpr = numericExpr  ## | stringExpr | dateExpr | intervalExpr
        derivedColumn = valueExpr + Optional(asToken + alias)
        selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn)

        tableName = delimitedList(ident, ".", combine=True)
        # don't upcase table names anymore
        # tableName.setParseAction(upcaseTokens)
        self.tableAction = []
        tableName.addParseAction(self.actionWrapper(self.tableAction))
        tableName.setResultsName("table")
        tableAlias = tableName + asToken + ident.setResultsName("aliasName")
        tableAlias.setResultsName("alias")
        genericTableName = tableAlias | tableName
        genericTableName = genericTableName.setResultsName("tablename")
        tableNameList = Group(genericTableName +
                              ZeroOrMore("," + genericTableName))

        whereExpression = Forward()
        and_ = Keyword("and", caseless=True)
        or_ = Keyword("or", caseless=True)
        in_ = Keyword("in", caseless=True)
        between_ = Keyword("between", caseless=True)

        E = CaselessLiteral("E")
        binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
        arithSign = Word("+-", exact=1)
        realNum = Combine(
            Optional(arithSign) +
            (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) +
            Optional(E + Optional(arithSign) + Word(nums)))
        intNum = Combine(
            Optional(arithSign) + Word(nums) +
            Optional(E + Optional("+") + Word(nums)))

        # need to add support for alg expressions
        columnRval = realNum | intNum | quotedString | columnNumericExpr  # | numericExpr
        columnRvalList << Group(columnRval + ZeroOrMore("," + columnRval))

        self.whereExpAction = []

        namedRv = columnRval.setResultsName("column")
        whereConditionFlat = Group((functionSpec + binop + columnRval)
                                   | (namedRv + binop + columnRval)
                                   | (namedRv + in_ + "(" + columnRval +
                                      ZeroOrMore("," + namedRv) + ")")
                                   | (namedRv + in_ + "(" + selectStmt + ")")
                                   | (namedRv + between_ + namedRv + and_ +
                                      namedRv))

        whereConditionFlat.addParseAction(
            self.actionWrapper(self.whereExpAction))
        whereCondition = Group(whereConditionFlat
                               | ("(" + whereExpression + ")"))

        # Test code to try to make an expression parse.
        # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))")
        # goodFunction = ident + Literal('(') + columnNumericExpr  + Literal(')')
        # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True)
        # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)")

        #whereExpression << whereCondition.setResultsName("wherecond")
        #+ ZeroOrMore( ( and_ | or_ ) + whereExpression )
        def scAnd(tok):
            print "scAnd", tok
            if "TRUE" == tok[0][0]:
                tok = tok[2]
            elif "TRUE" == tok[2][0]:
                tok = tok[0]

            return tok

        def scOr(tok):
            print "scOr", tok
            if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]):
                tok = [["TRUE"]]
            return tok

        def scWhere(tok):
            newtok = []
            i = 0
            while i < len(tok):
                if str(tok[i]) in ["TRUE", str(["TRUE"])
                                   ] and (i + 1) < len(tok):
                    if str(tok[i + 1]).upper() == "AND":
                        i += 2
                        continue
                    elif str(tok[i + i]).upper() == "OR":
                        break
                newtok.append(tok[i])
                i += 1
            return newtok

        def collapseWhere(tok):
            #collapse.append(tok[0][1])
            if ["TRUE"] == tok.asList()[0][1]:
                tok = []
            return tok

        andExpr = and_ + whereExpression
        orExpr = or_ + whereExpression
        whereExpression << whereCondition + ZeroOrMore(andExpr | orExpr)
        whereExpression.addParseAction(scWhere)

        self.selectPart = selectToken + (
            '*' | selectSubList).setResultsName("columns")
        whereClause = Group(whereToken +
                            whereExpression).setResultsName("where")
        whereClause.addParseAction(collapseWhere)
        self.fromPart = fromToken + tableNameList.setResultsName("tables")

        # define the grammar
        selectStmt << (self.selectPart + fromToken +
                       tableNameList.setResultsName("tables") + whereClause)

        self.simpleSQL = selectStmt + semicolon

        # define Oracle comment format, and ignore them
        oracleSqlComment = "--" + restOfLine
        self.simpleSQL.ignore(oracleSqlComment)