def test_multiple_indices_access():
    actual = ExpressionParser.parse(to_tokens("array[index(0), 'foo'] alias"))
    expected = Alias(
        Index(
            Column("array"),
            [FunctionCall("index", Integer(0)),
             String("foo", quotes="'")],
        ),
        with_as=False,
        alias="alias",
    )
    assert actual == expected
Exemple #2
0
    def parse(
        tokens,
        is_right_hand=False,
        can_be_type=False,
        can_alias=True,
        until_one_of=None,
        first_token=None,
        is_chained_columns=False,
    ) -> Tuple[Expression, Any]:
        until_one_of = until_one_of or []

        main_token = first_token or next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token.replace(".", "").isdigit():
            expression = Float(main_token)
        elif lower(main_token) in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif lower(main_token) in Null.VALUES:
            expression = Null()
        elif lower(main_token) == Negation.PREDICATE:
            rest_expression, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = Negation(rest_expression)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            assert next_token == "]", next_token
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Array(*arguments)
            next_token = next(tokens, None)
        elif lower(main_token) == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert lower(next_token) == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif lower(main_token) == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                if lower(main_token) == "cast":
                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens, stop_words=["as"])
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens),
                        is_right_hand=True,
                        until_one_of=until_one_of,
                    )
                    assert lower(next_token) == "as", next_token
                    next_token = next(tokens)
                    cast_type = Type(next_token)
                    expression = CastFunctionCall(column, cast_type)
                    next_token = next(tokens)
                    assert lower(next_token) == ")", next_token
                elif lower(main_token) == "array_agg":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens,
                        stop_words=[
                            ")", "ignore", "respects", "order", "limit"
                        ],
                        first_token=first_token,
                    )
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens), until_one_of=until_one_of)

                    ignore_nulls = respect_nulls = False
                    if lower(next_token) == "ignore":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        ignore_nulls = True
                        next_token = next(tokens)
                    elif lower(next_token) == "respect":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        respect_nulls = True
                        next_token = next(tokens)

                    if lower(next_token) == "order":
                        next_token = next(tokens)
                        assert lower(next_token) == "by"
                        expression_tokens, next_token = get_tokens_until_one_of(
                            tokens, ["limit", ")"])
                        order_bys = OrderByParser.parse(
                            iter(expression_tokens))
                    else:
                        order_bys = None

                    limit = None
                    if lower(next_token) == "limit":
                        next_token = next(tokens)
                        limit = int(next_token)
                        next_token = next(tokens)

                    assert lower(next_token) == ")", next_token
                    expression = ArrayAggFunctionCall(
                        column=column,
                        distinct=distinct,
                        ignore_nulls=ignore_nulls,
                        respect_nulls=respect_nulls,
                        order_bys=order_bys,
                        limit=limit,
                    )
                elif lower(main_token) == "count":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens, first_token=first_token)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens))
                    expression = CountFunctionCall(*arguments,
                                                   distinct=distinct)
                else:
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    arguments_can_be_type = can_be_type or any(
                        lower(t) == "timestamp_trunc" for t in argument_tokens)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens),
                        can_be_type=arguments_can_be_type)
                    expression = FunctionCall(main_token, *arguments)

                next_token = next(tokens, None)
                if next_token and lower(next_token) == "filter":
                    next_next_token = next(tokens)
                    assert next_next_token == "(", next_next_token
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    assert lower(
                        argument_tokens[0]) == "where", argument_tokens
                    filter_condition, next_token = ExpressionParser.parse(
                        iter(argument_tokens[1:]),
                        can_alias=False,
                    )

                    expression = FilteredFunctionCall(expression,
                                                      filter_condition)
                    next_token = next(tokens, None)

            elif (next_token is not None
                  and lower(main_token) in DatePartExtraction.PARTS
                  and lower(next_token) == "from"):
                rest_expression, next_token = ExpressionParser.parse(
                    tokens, until_one_of=until_one_of)
                expression = DatePartExtraction(main_token, rest_expression)
            elif lower(main_token) in Type.VALUES and can_be_type:
                expression = Type(main_token)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (next_token is not None and main_token == "-"
                  and next_token.replace(".", "").isdigit()):
                expression = Float(-float(next_token))
                next_token = next(tokens, None)
            elif (lower(main_token) in String.PREFIXES
                  and next_token is not None
                  and lower(next_token) in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if lower(next_token) == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if lower(argument_next_token) == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if lower(argument_next_token) == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if lower(argument_next_token) in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause: Optional[WindowFrameClause] = WindowFrameClause(
                    rows_range, " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        while next_token == ".":
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of, is_chained_columns=True)
            expression = ChainedColumns(expression, right_hand)

        if next_token and next_token in ("+", "-", "*",
                                         "/") and not is_chained_columns:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        while next_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Index(expression, arguments)
            next_token = next(tokens, None)

        if is_right_hand or is_chained_columns:
            return expression, next_token

        if lower(next_token) in Condition.PREDICATES:
            first_token = None
            symbol = next_token
            if lower(next_token) == "is":
                next_next_token = next(tokens)
                if lower(next_next_token) == "not":
                    symbol = "is not"
                else:
                    first_token = next_next_token
            elif lower(next_token) == "not":
                next_next_token = next(tokens)
                if lower(next_next_token) == "in":
                    symbol = "not in"
                else:
                    first_token = next_next_token

            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
                first_token=first_token,
            )
            expression = Condition(expression, symbol, right_hand)
        elif lower(next_token) == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            if lower(next_token) != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)
        elif next_token in BitwiseOperation.OPERATORS:
            operator = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            expression = BitwiseOperation(expression, operator, right_hand)

        if lower(next_token) in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of)
            right_alias = None
            if isinstance(right_hand, Alias):
                right_alias = right_hand
                right_hand = right_hand.expression
            expression = BooleanCondition(symbol, left_hand, right_hand)
            if right_alias is not None:
                right_alias.expression = expression
                expression = right_alias

        if lower(next_token) == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")"
                and not (next_token in String.QUOTES
                         and isinstance(expression, String))
                and next_token != ";" and lower(next_token) not in until_one_of
                and can_alias):
            if lower(next_token) == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens,
                                                  is_right_hand=True,
                                                  until_one_of=until_one_of)
            else:
                with_as = False
                alias = next_token
            if alias in String.QUOTES:
                alias = StringParser.parse(tokens, alias)
            return Alias(expression, alias, with_as), next(tokens, None)
        return expression, next_token
Exemple #3
0
    def parse(tokens, is_right_hand=False):
        main_token = next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif main_token in Null.VALUES:
            expression = Null()
        elif main_token in Type.VALUES:
            expression = Type(main_token)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert next_token == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif main_token == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                argument_tokens = get_tokens_until_closing_parenthesis(tokens)
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = FunctionCall(main_token, *arguments)
                next_token = next(tokens, None)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (main_token in String.PREFIXES and next_token is not None
                  and next_token in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if next_token == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if argument_next_token == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if argument_next_token == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if argument_next_token in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause = WindowFrameClause(rows_range,
                                                 " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        if next_token and next_token in ("+", "-", "*", "/"):
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        if is_right_hand:
            return expression, next_token

        if next_token in Condition.PREDICATES:
            symbol = next_token
            if next_token == "is":
                next_next_token = next(tokens)
                if next_next_token == "not":
                    symbol = "is not"
                else:
                    tokens, _ = get_tokens_until_one_of(
                        tokens, [], first_token=next_next_token)
                    tokens = iter(tokens)

            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = Condition(expression, symbol, right_hand)
        elif next_token == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            if next_token != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)

        if next_token in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand = ExpressionParser.parse(tokens)
            expression = BooleanCondition(symbol, left_hand, right_hand)
            next_token = next(tokens, None)

        if next_token == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")" and next_token != "'"
                and next_token != '"' and next_token != "`"
                and next_token != ";"):
            if next_token == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens, is_right_hand=True)
            else:
                with_as = False
                alias = next_token
            return Alias(expression, alias, with_as)
        return expression
def test_index_function_access():
    actual = ExpressionParser.parse(to_tokens("array[index(0)]"))
    expected = Index(Column("array"), [FunctionCall("index", Integer(0))])
    assert actual == expected
def test_index_access_alias():
    actual = ExpressionParser.parse(to_tokens("array[0] alias"))
    expected = Alias(Index(Column("array"), [Integer(0)]),
                     with_as=False,
                     alias="alias")
    assert actual == expected
def test_index_access():
    actual = ExpressionParser.parse(to_tokens("array[0]"))
    expected = Index(Column("array"), [Integer(0)])
    assert actual == expected