예제 #1
0
def test_group_by_without_from():
    actual = SQLStatementParser.parse(to_tokens("SELECT 1 GROUP BY 2"))
    expected = SelectStatement(
        expressions=[Integer(1)],
        group_by_clause=GroupByClause(Integer(2)),
        semi_colon=False,
    )
    assert actual == expected
예제 #2
0
def test_boolean_condition():
    sql = BooleanCondition(
        "and",
        Condition(Column("col"), "=", Integer(1)),
        Condition(Column("col2"), "=", Integer(4)),
    )
    expected = "col = 1 AND col2 = 4"
    assert transform(sql) == expected.strip()
예제 #3
0
def test_boolean_where_clause():
    actual = WhereClauseParser.parse(to_tokens("col = 3 and field = 5"))
    expected = WhereClause(
        BooleanCondition(
            "and",
            Condition(Column("col"), "=", Integer(3)),
            Condition(Column("field"), "=", Integer(5)),
        ))
    assert actual == expected
예제 #4
0
def test_chained_field():
    actual, _ = ExpressionParser.parse(
        to_tokens("table.field[offset(0)].subfield[offset(0)]"))
    expected = ChainedColumns(
        Column("table"),
        ChainedColumns(
            Index(Column("field"), [FunctionCall("offset", Integer(0))]),
            Index(Column("subfield"), [FunctionCall("offset", Integer(0))]),
        ),
    )
    assert actual == expected
예제 #5
0
def test_limit_parentheses():
    actual = SQLStatementParser.parse(to_tokens("SELECT 1 LIMIT (((3)))"))
    expected = SelectStatement(
        expressions=[Integer(1)],
        limit_clause=LimitClause(
            limit_all=False,
            expression=Parenthesis(Parenthesis(Parenthesis(Integer(3)))),
        ),
        semi_colon=False,
    )
    assert actual == expected
예제 #6
0
def test_between_where_clause():
    actual = WhereClauseParser.parse(to_tokens("col between 3 and 5"))
    expected = WhereClause(
        Condition(
            Column("col"),
            "between",
            BooleanCondition(
                "and",
                Integer(3),
                Integer(5),
            ),
        ))
    assert actual == expected
예제 #7
0
def test_consecutive_parenthesis():
    actual = ExpressionParser.parse(to_tokens("((col+1) = 3 AND col2=4)"))
    expected = Parenthesis(
        BooleanCondition(
            "and",
            Condition(Parenthesis(Addition(Column("col"), Integer(1))), "=",
                      Integer(3)),
            Condition(
                Column("col2"),
                "=",
                Integer(4),
            ),
        ))
    assert actual == expected
예제 #8
0
def test_multiple_args_boolean_condition():
    actual = WhereClauseParser.parse(
        to_tokens("(col = 1 and col2=4 and col3=4)"))
    expected = WhereClause(
        Parenthesis(
            BooleanCondition(
                "and",
                Condition(Column("col"), "=", Integer(1)),
                BooleanCondition(
                    "and",
                    Condition(Column("col2"), "=", Integer(4)),
                    Condition(Column("col3"), "=", Integer(4)),
                ),
            ), ))
    assert actual == expected
예제 #9
0
def test_parenthesis_boolean_where_clause():
    actual = WhereClauseParser.parse(
        to_tokens("(col = 3 and field = 5) or (f2 or f3)"))
    expected = WhereClause(
        BooleanCondition(
            "or",
            Parenthesis(
                BooleanCondition(
                    "and",
                    Condition(Column("col"), "=", Integer(3)),
                    Condition(Column("field"), "=", Integer(5)),
                )),
            Parenthesis(BooleanCondition("or", Column("f2"), Column("f3"))),
        ))
    assert actual == expected
예제 #10
0
def test_where_clause_keeps_generator_intact():
    tokens = to_tokens("col = 3 group by col")
    actual, next_token = WhereClauseParser.parse(tokens)
    expected = WhereClause(Condition(Column("col"), "=", Integer(3)))
    assert actual == expected
    assert next_token == "group"
    assert list(tokens) == ["by", "col"]
예제 #11
0
def test_subquery():
    actual = SQLStatementParser.parse(
        to_tokens(
            "SELECT col"
            " from (select count(*) col"
            " from table group by x) WHERE col > 10 ORDER BY col DESC;"
        )
    )
    expected = SelectStatement(
        expressions=[Column("col")],
        from_statement=Parenthesis(
            SelectStatement(
                expressions=[
                    Alias(CountFunctionCall(Column("*")), "col", with_as=False)
                ],
                from_statement=Table(Column("table")),
                group_by_clause=GroupByClause(Column("x")),
                semi_colon=False,
            )
        ),
        where_clause=WhereClause(Condition(Column("col"), ">", Integer(10))),
        order_by_clause=OrderByClause(OrderByItem(Column("col"), has_desc=True)),
        semi_colon=True,
    )
    assert actual == expected
예제 #12
0
def test_index_access_right_hand():
    actual = ExpressionParser.parse(to_tokens("field = array[0]"))
    expected = Condition(
        Column("field"),
        "=",
        Index(Column("array"), [Integer(0)]),
    )
    assert actual == expected
예제 #13
0
def test_order_by_clause():
    actual = SQLStatementParser.parse(
        to_tokens("SELECT col FROM t ORDER BY col, 2"))
    expected = SelectStatement(
        expressions=[Column("col")],
        from_statement=Table("t"),
        order_by_clause=OrderByClause(OrderByItem(Column("col")),
                                      OrderByItem(Integer(2))),
        semi_colon=False,
    )
    assert actual == expected
예제 #14
0
def test_multiple_indices_access():
    actual, _ = ExpressionParser.parse(to_tokens("array[index(0), 'foo'] alias"))
    expected = Alias(
        Index(
            Column("array"),
            [FunctionCall("index", Integer(0)), String("foo", quotes="'")],
        ),
        with_as=False,
        alias="alias",
    )
    assert actual == expected
예제 #15
0
def test_boolean_condition():
    actual, _ = ExpressionParser.parse(to_tokens("f1 IS NOT NULL AND f2 > 0 fnew"))
    expected = Alias(
        BooleanCondition(
            "AND",
            Condition(Column("f1"), "is not", Null()),
            Condition(Column("f2"), ">", Integer(0)),
        ),
        with_as=False,
        alias="fnew",
    )
    assert actual == expected
예제 #16
0
def test_boolean_condition_as_expression():
    sql = "field is not null and col > 0"
    actual, _ = ExpressionParser.parse(to_tokens(sql))
    expected = BooleanCondition(
        "and",
        Condition(
            Column("field"),
            "is not",
            Null(),
        ),
        Condition(
            Column("col"),
            ">",
            Integer(0),
        ),
    )
    assert actual == expected
예제 #17
0
def test_select_boolean_condition_expression():
    sql = "select field is not null and col > 0 from t;"
    actual = SQLStatementParser.parse(to_tokens(sql))
    expected = SelectStatement(
        expressions=[
            BooleanCondition(
                "and",
                Condition(
                    Column("field"),
                    "is not",
                    Null(),
                ),
                Condition(
                    Column("col"),
                    ">",
                    Integer(0),
                ),
            )
        ],
        from_statement=Table(Column("t")),
    )
    assert actual == expected
예제 #18
0
def test_chained_columns_with_arithmetic_operator():
    actual, _ = ExpressionParser.parse(
        to_tokens("IF((a.field + b.field) = 200, 'true', 'false') fa"))
    expected = Alias(
        FunctionCall(
            "IF", *[
                Condition(
                    Parenthesis(
                        ArithmaticOperator(
                            "+",
                            ChainedColumns(Column("a"), Column("field")),
                            ChainedColumns(Column("b"), Column("field")),
                        )),
                    "=",
                    Integer(200),
                ),
                String("true", quotes="'"),
                String("false", quotes="'"),
            ]),
        with_as=False,
        alias="fa",
    )
    assert actual == expected
예제 #19
0
def test_index_function_access():
    actual = ExpressionParser.parse(to_tokens("array[index(0)]"))
    expected = Index(Column("array"), [FunctionCall("index", Integer(0))])
    assert actual == expected
예제 #20
0
def test_index_access_alias():
    actual = ExpressionParser.parse(to_tokens("array[0] alias"))
    expected = Alias(Index(Column("array"), [Integer(0)]),
                     with_as=False,
                     alias="alias")
    assert actual == expected
예제 #21
0
def test_index_access():
    actual = ExpressionParser.parse(to_tokens("array[0]"))
    expected = Index(Column("array"), [Integer(0)])
    assert actual == expected
예제 #22
0
def test_parenthesis_multiple_elements():
    actual = ExpressionParser.parse(to_tokens("(field,other_field,3,'test')"))
    expected = Parenthesis(Column("field"), Column("other_field"), Integer(3),
                           String("test", quotes="'"))
    assert actual == expected
예제 #23
0
def test_select_all():
    actual = SQLStatementParser.parse(to_tokens("SELECT ALL 1"))
    expected = SelectStatement(select_all=True,
                               expressions=[Integer(1)],
                               semi_colon=False)
    assert actual == expected
예제 #24
0
    def parse(tokens, is_right_hand=False):
        main_token = next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif main_token in Null.VALUES:
            expression = Null()
        elif main_token in Type.VALUES:
            expression = Type(main_token)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert next_token == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif main_token == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                argument_tokens = get_tokens_until_closing_parenthesis(tokens)
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = FunctionCall(main_token, *arguments)
                next_token = next(tokens, None)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (main_token in String.PREFIXES and next_token is not None
                  and next_token in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if next_token == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if argument_next_token == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if argument_next_token == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if argument_next_token in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause = WindowFrameClause(rows_range,
                                                 " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        if next_token and next_token in ("+", "-", "*", "/"):
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        if is_right_hand:
            return expression, next_token

        if next_token in Condition.PREDICATES:
            symbol = next_token
            if next_token == "is":
                next_next_token = next(tokens)
                if next_next_token == "not":
                    symbol = "is not"
                else:
                    tokens, _ = get_tokens_until_one_of(
                        tokens, [], first_token=next_next_token)
                    tokens = iter(tokens)

            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = Condition(expression, symbol, right_hand)
        elif next_token == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            if next_token != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)

        if next_token in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand = ExpressionParser.parse(tokens)
            expression = BooleanCondition(symbol, left_hand, right_hand)
            next_token = next(tokens, None)

        if next_token == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")" and next_token != "'"
                and next_token != '"' and next_token != "`"
                and next_token != ";"):
            if next_token == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens, is_right_hand=True)
            else:
                with_as = False
                alias = next_token
            return Alias(expression, alias, with_as)
        return expression
예제 #25
0
def test_where_different_predicate():
    actual = WhereClauseParser.parse(to_tokens("col <> 3"))
    expected = WhereClause(Condition(Column("col"), "<>", Integer(3)), )
    assert actual == expected
예제 #26
0
def test_select_distinct():
    actual = SQLStatementParser.parse(to_tokens("SELECT DISTINCT 1"))
    expected = SelectStatement(select_distinct=True,
                               expressions=[Integer(1)],
                               semi_colon=False)
    assert actual == expected
예제 #27
0
def test_nested_joins():
    sql = """
SELECT COALESCE(sq_1.col, sq_2.col) f0_

FROM (SELECT ANY_VALUE(col) col,
LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last,
hash
FROM (SELECT *
FROM `events`
WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510')
JOIN
(SELECT * EXCEPT (hash)
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY hash) AS rn
FROM `test-table`
WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501')
WHERE rn = 1)
USING (hash)
GROUP BY hash) sq_1

FULL OUTER JOIN

(SELECT ANY_VALUE(col) col,
hash
FROM (SELECT *
FROM `events`
WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410')
JOIN
(SELECT * EXCEPT (hash),
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY hash) AS rn
FROM `test-table`
WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401')
WHERE rn = 1)
USING (hash)
GROUP BY hash) sq_2

ON sq_1.hash = sq_2.hash
WHERE sq_1.last = 1
GROUP BY f0_
"""  # noqa
    actual = SQLStatementParser.parse(to_tokens(sql))
    expected = SelectStatement(
        expressions=[
            Alias(
                FunctionCall("coalesce", Column("sq_1.col"),
                             Column("sq_2.col")),
                "f0_",
                with_as=False,
            )
        ],
        from_statement=Join(
            "FULL OUTER JOIN",
            left_from=Alias(
                Parenthesis(
                    SelectStatement(
                        expressions=[
                            Alias(
                                FunctionCall("any_value", Column("col")),
                                "col",
                                with_as=False,
                            ),
                            Alias(
                                AnalyticsClause(
                                    FunctionCall(
                                        "last_value",
                                        FunctionCall("any_value",
                                                     Column("col2")),
                                    ),
                                    partition_by=[
                                        FunctionCall("any_value",
                                                     Column("col"))
                                    ],
                                    order_by=OrderByClause(
                                        OrderByItem(
                                            FunctionCall(
                                                "sum", Column("clicks")),
                                            has_asc=True,
                                        ),
                                        OrderByItem(
                                            FunctionCall(
                                                "sum", Column("metric")),
                                            has_asc=True,
                                        ),
                                    ),
                                    frame_clause=WindowFrameClause(
                                        "range",
                                        "between unbounded preceding and unbounded following",  # noqa
                                    ),
                                ),
                                "last",
                                with_as=False,
                            ),
                            Column("hash"),
                        ],
                        from_statement=Join(
                            join_type="JOIN",
                            left_from=Parenthesis(
                                SelectStatement(
                                    expressions=[Column("*")],
                                    from_statement=Table(
                                        String("events", quotes="`")),
                                    where_clause=WhereClause(
                                        Condition(
                                            Column("_table_suffix"),
                                            "between",
                                            BooleanCondition(
                                                "and",
                                                String("20200410", quotes="'"),
                                                String("20200510", quotes="'"),
                                            ),
                                        )),
                                    semi_colon=False,
                                )),
                            right_from=Parenthesis(
                                SelectStatement(
                                    expressions=[
                                        ExceptClause(Column("*"),
                                                     [Column("hash")])
                                    ],
                                    from_statement=Parenthesis(
                                        SelectStatement(
                                            expressions=[
                                                Column("*"),
                                                Alias(
                                                    AnalyticsClause(
                                                        FunctionCall(
                                                            "row_number"),
                                                        partition_by=[
                                                            Column("hash")
                                                        ],
                                                        order_by=None,
                                                        frame_clause=None,
                                                    ),
                                                    Column("rn"),
                                                    with_as=True,
                                                ),
                                            ],
                                            from_statement=Table(
                                                String("test-table",
                                                       quotes="`")),
                                            where_clause=WhereClause(
                                                Condition(
                                                    Column("_table_suffix"),
                                                    "between",
                                                    BooleanCondition(
                                                        "and",
                                                        String("20200401",
                                                               quotes="'"),
                                                        String("20200501",
                                                               quotes="'"),
                                                    ),
                                                )),
                                            semi_colon=False,
                                        )),
                                    where_clause=WhereClause(
                                        Condition(Column("rn"), "=",
                                                  Integer(1))),
                                    semi_colon=False,
                                )),
                            on=None,
                            using=UsingClause(Parenthesis(Column("hash"))),
                        ),
                        group_by_clause=GroupByClause(Column("hash")),
                        semi_colon=False,
                    )),
                "sq_1",
                with_as=False,
            ),
            right_from=Alias(
                Parenthesis(
                    SelectStatement(
                        expressions=[
                            Alias(
                                FunctionCall("any_value", Column("col")),
                                "col",
                                with_as=False,
                            ),
                            Column("hash"),
                        ],
                        from_statement=Join(
                            join_type="JOIN",
                            left_from=Parenthesis(
                                SelectStatement(
                                    expressions=[Column("*")],
                                    from_statement=Table(
                                        String("events", quotes="`")),
                                    where_clause=WhereClause(
                                        Condition(
                                            Column("_table_suffix"),
                                            "between",
                                            BooleanCondition(
                                                "and",
                                                String("20200310", quotes="'"),
                                                String("20200410", quotes="'"),
                                            ),
                                        )),
                                    semi_colon=False,
                                )),
                            right_from=Parenthesis(
                                SelectStatement(
                                    expressions=[
                                        ExceptClause(Column("*"),
                                                     [Column("hash")])
                                    ],
                                    from_statement=Parenthesis(
                                        SelectStatement(
                                            expressions=[
                                                Column("*"),
                                                Alias(
                                                    AnalyticsClause(
                                                        FunctionCall(
                                                            "row_number"),
                                                        partition_by=[
                                                            Column("hash")
                                                        ],
                                                        order_by=None,
                                                        frame_clause=None,
                                                    ),
                                                    Column("rn"),
                                                    with_as=True,
                                                ),
                                            ],
                                            from_statement=Table(
                                                String("test-table",
                                                       quotes="`")),
                                            where_clause=WhereClause(
                                                Condition(
                                                    Column("_table_suffix"),
                                                    "between",
                                                    BooleanCondition(
                                                        "and",
                                                        String("20200301",
                                                               quotes="'"),
                                                        String("20200401",
                                                               quotes="'"),
                                                    ),
                                                )),
                                            semi_colon=False,
                                        ), ),
                                    where_clause=WhereClause(
                                        Condition(Column("rn"), "=",
                                                  Integer(1))),
                                    semi_colon=False,
                                )),
                            on=None,
                            using=UsingClause(Parenthesis(Column("hash"))),
                        ),
                        group_by_clause=GroupByClause(Column("hash")),
                        semi_colon=False,
                    )),
                "sq_2",
                with_as=False,
            ),
            on=OnClause(
                Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))),
            using=None,
        ),
        where_clause=WhereClause(
            Condition(Column("sq_1.last"), "=", Integer(1))),
        group_by_clause=GroupByClause(Column("f0_")),
        semi_colon=False,
    )
    assert actual == expected
예제 #28
0
def test_where_clause():
    actual = WhereClauseParser.parse(to_tokens("col = 3"))
    expected = WhereClause(Condition(Column("col"), "=", Integer(3)))
    assert actual == expected
예제 #29
0
    def parse(
        tokens,
        is_right_hand=False,
        can_be_type=False,
        can_alias=True,
        until_one_of=None,
        first_token=None,
        is_chained_columns=False,
    ) -> Tuple[Expression, Any]:
        until_one_of = until_one_of or []

        main_token = first_token or next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token.replace(".", "").isdigit():
            expression = Float(main_token)
        elif lower(main_token) in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif lower(main_token) in Null.VALUES:
            expression = Null()
        elif lower(main_token) == Negation.PREDICATE:
            rest_expression, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = Negation(rest_expression)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            assert next_token == "]", next_token
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Array(*arguments)
            next_token = next(tokens, None)
        elif lower(main_token) == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert lower(next_token) == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif lower(main_token) == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                if lower(main_token) == "cast":
                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens, stop_words=["as"])
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens),
                        is_right_hand=True,
                        until_one_of=until_one_of,
                    )
                    assert lower(next_token) == "as", next_token
                    next_token = next(tokens)
                    cast_type = Type(next_token)
                    expression = CastFunctionCall(column, cast_type)
                    next_token = next(tokens)
                    assert lower(next_token) == ")", next_token
                elif lower(main_token) == "array_agg":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens,
                        stop_words=[
                            ")", "ignore", "respects", "order", "limit"
                        ],
                        first_token=first_token,
                    )
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens), until_one_of=until_one_of)

                    ignore_nulls = respect_nulls = False
                    if lower(next_token) == "ignore":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        ignore_nulls = True
                        next_token = next(tokens)
                    elif lower(next_token) == "respect":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        respect_nulls = True
                        next_token = next(tokens)

                    if lower(next_token) == "order":
                        next_token = next(tokens)
                        assert lower(next_token) == "by"
                        expression_tokens, next_token = get_tokens_until_one_of(
                            tokens, ["limit", ")"])
                        order_bys = OrderByParser.parse(
                            iter(expression_tokens))
                    else:
                        order_bys = None

                    limit = None
                    if lower(next_token) == "limit":
                        next_token = next(tokens)
                        limit = int(next_token)
                        next_token = next(tokens)

                    assert lower(next_token) == ")", next_token
                    expression = ArrayAggFunctionCall(
                        column=column,
                        distinct=distinct,
                        ignore_nulls=ignore_nulls,
                        respect_nulls=respect_nulls,
                        order_bys=order_bys,
                        limit=limit,
                    )
                elif lower(main_token) == "count":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens, first_token=first_token)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens))
                    expression = CountFunctionCall(*arguments,
                                                   distinct=distinct)
                else:
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    arguments_can_be_type = can_be_type or any(
                        lower(t) == "timestamp_trunc" for t in argument_tokens)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens),
                        can_be_type=arguments_can_be_type)
                    expression = FunctionCall(main_token, *arguments)

                next_token = next(tokens, None)
                if next_token and lower(next_token) == "filter":
                    next_next_token = next(tokens)
                    assert next_next_token == "(", next_next_token
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    assert lower(
                        argument_tokens[0]) == "where", argument_tokens
                    filter_condition, next_token = ExpressionParser.parse(
                        iter(argument_tokens[1:]),
                        can_alias=False,
                    )

                    expression = FilteredFunctionCall(expression,
                                                      filter_condition)
                    next_token = next(tokens, None)

            elif (next_token is not None
                  and lower(main_token) in DatePartExtraction.PARTS
                  and lower(next_token) == "from"):
                rest_expression, next_token = ExpressionParser.parse(
                    tokens, until_one_of=until_one_of)
                expression = DatePartExtraction(main_token, rest_expression)
            elif lower(main_token) in Type.VALUES and can_be_type:
                expression = Type(main_token)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (next_token is not None and main_token == "-"
                  and next_token.replace(".", "").isdigit()):
                expression = Float(-float(next_token))
                next_token = next(tokens, None)
            elif (lower(main_token) in String.PREFIXES
                  and next_token is not None
                  and lower(next_token) in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if lower(next_token) == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if lower(argument_next_token) == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if lower(argument_next_token) == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if lower(argument_next_token) in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause: Optional[WindowFrameClause] = WindowFrameClause(
                    rows_range, " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        while next_token == ".":
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of, is_chained_columns=True)
            expression = ChainedColumns(expression, right_hand)

        if next_token and next_token in ("+", "-", "*",
                                         "/") and not is_chained_columns:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        while next_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Index(expression, arguments)
            next_token = next(tokens, None)

        if is_right_hand or is_chained_columns:
            return expression, next_token

        if lower(next_token) in Condition.PREDICATES:
            first_token = None
            symbol = next_token
            if lower(next_token) == "is":
                next_next_token = next(tokens)
                if lower(next_next_token) == "not":
                    symbol = "is not"
                else:
                    first_token = next_next_token
            elif lower(next_token) == "not":
                next_next_token = next(tokens)
                if lower(next_next_token) == "in":
                    symbol = "not in"
                else:
                    first_token = next_next_token

            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
                first_token=first_token,
            )
            expression = Condition(expression, symbol, right_hand)
        elif lower(next_token) == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            if lower(next_token) != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)
        elif next_token in BitwiseOperation.OPERATORS:
            operator = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            expression = BitwiseOperation(expression, operator, right_hand)

        if lower(next_token) in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of)
            right_alias = None
            if isinstance(right_hand, Alias):
                right_alias = right_hand
                right_hand = right_hand.expression
            expression = BooleanCondition(symbol, left_hand, right_hand)
            if right_alias is not None:
                right_alias.expression = expression
                expression = right_alias

        if lower(next_token) == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")"
                and not (next_token in String.QUOTES
                         and isinstance(expression, String))
                and next_token != ";" and lower(next_token) not in until_one_of
                and can_alias):
            if lower(next_token) == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens,
                                                  is_right_hand=True,
                                                  until_one_of=until_one_of)
            else:
                with_as = False
                alias = next_token
            if alias in String.QUOTES:
                alias = StringParser.parse(tokens, alias)
            return Alias(expression, alias, with_as), next(tokens, None)
        return expression, next_token
예제 #30
0
def test_parenthesis_expression_where_clause():
    actual = WhereClauseParser.parse(to_tokens("(col + 1) = col2"))
    expected = WhereClause(
        Condition(Parenthesis(Addition(Column("col"), Integer(1))), "=",
                  Column("col2")))
    assert actual == expected