Example #1
0
def test_chained_field():
    actual, _ = ExpressionParser.parse(
        to_tokens("table.field[offset(0)].subfield[offset(0)]"))
    expected = ChainedColumns(
        Column("table"),
        ChainedColumns(
            Index(Column("field"), [FunctionCall("offset", Integer(0))]),
            Index(Column("subfield"), [FunctionCall("offset", Integer(0))]),
        ),
    )
    assert actual == expected
Example #2
0
def test_nested_date_functions():
    actual, _ = ExpressionParser.parse(
        to_tokens("DATE(TIMESTAMP_TRUNC(CAST(date AS TIMESTAMP), MONTH))"))
    expected = FunctionCall(
        "DATE",
        FunctionCall(
            "TIMESTAMP_TRUNC",
            CastFunctionCall(
                Column("date"),
                Type("TIMESTAMP"),
            ),
            Type("MONTH"),
        ),
    )
    assert actual == expected
def test_nested_date_functions():
    actual = ExpressionParser.parse(
        to_tokens("DATE(TIMESTAMP_TRUNC(CAST(a.date AS TIMESTAMP), MONTH))"))
    expected = FunctionCall(
        "date",
        FunctionCall(
            "timestamp_trunc",
            FunctionCall(
                "cast",
                Alias(Column("a.date"), alias=Type("timestamp"),
                      with_as=True)),
            Type("month"),
        ),
    )
    assert actual == expected
Example #4
0
def test_unnest_with_offset_as_alias():
    sql = "unnest(table) with offset as o"
    actual = FromStatementParser.parse(to_tokens(sql))
    expected = Unnest(
        FunctionCall("unnest", Column("table")),
        with_offset=True,
        with_offset_as=True,
        offset_alias="o",
    )
    assert actual == expected
Example #5
0
def test_unnest():
    sql = "unnest(table)"
    actual = FromStatementParser.parse(to_tokens(sql))
    expected = Unnest(
        FunctionCall("unnest", Column("table")),
        with_offset=False,
        with_offset_as=False,
        offset_alias=None,
    )
    assert actual == expected
Example #6
0
def test_multiple_indices_access():
    actual, _ = ExpressionParser.parse(to_tokens("array[index(0), 'foo'] alias"))
    expected = Alias(
        Index(
            Column("array"),
            [FunctionCall("index", Integer(0)), String("foo", quotes="'")],
        ),
        with_as=False,
        alias="alias",
    )
    assert actual == expected
Example #7
0
    def parse(tokens):
        next_token = next(tokens)
        assert next_token == "("
        argument_tokens = get_tokens_until_closing_parenthesis(tokens)
        arguments = ExpressionListParser.parse(iter(argument_tokens))
        expression = FunctionCall("unnest", *arguments)

        next_token = next(tokens, None)
        if (next_token is not None and lower(next_token) not in Join.VALUES
                and lower(next_token) not in CombinedQueries.SET_OPERATORS
                and lower(next_token) != "with"):
            if lower(next_token) == "as":
                with_as = True
                alias = next(tokens)
            else:
                with_as = False
                alias = next_token
            if alias in String.QUOTES:
                alias = StringParser.parse(tokens, alias)
            expression = Alias(expression, alias, with_as)
            next_token = next(tokens, None)

        with_offset = False
        with_offset_as = False
        offset_alias = None

        if lower(next_token) == "with":
            next_token = next(tokens)
            assert lower(next_token) == "offset", next_token
            with_offset = True

            next_token = next(tokens, None)
            if (next_token is not None and lower(next_token) not in Join.VALUES
                    and lower(next_token)
                    not in CombinedQueries.SET_OPERATORS):
                if lower(next_token) == "as":
                    with_offset_as = True
                    offset_alias = next(tokens)
                else:
                    offset_alias = next_token
                next_token = next(tokens, None)

        return (
            Unnest(
                expression,
                with_offset=with_offset,
                with_offset_as=with_offset_as,
                offset_alias=offset_alias,
            ),
            next_token,
        )
Example #8
0
def test_chained_columns_with_arithmetic_operator():
    actual, _ = ExpressionParser.parse(
        to_tokens("IF((a.field + b.field) = 200, 'true', 'false') fa"))
    expected = Alias(
        FunctionCall(
            "IF", *[
                Condition(
                    Parenthesis(
                        ArithmaticOperator(
                            "+",
                            ChainedColumns(Column("a"), Column("field")),
                            ChainedColumns(Column("b"), Column("field")),
                        )),
                    "=",
                    Integer(200),
                ),
                String("true", quotes="'"),
                String("false", quotes="'"),
            ]),
        with_as=False,
        alias="fa",
    )
    assert actual == expected
def test_subquery():
    actual = SQLStatementParser.parse(
        to_tokens("SELECT col"
                  " from (select count(*) col"
                  " from table group by x) WHERE col > 10 ORDER BY col DESC;"))
    expected = SelectStatement(
        expressions=[Column("col")],
        from_statement=Parenthesis(
            SelectStatement(
                expressions=[
                    Alias(FunctionCall("count", Column("*")),
                          "col",
                          with_as=False)
                ],
                from_statement=Table("table"),
                group_by_clause=GroupByClause(Column("x")),
                semi_colon=False,
            )),
        where_clause=WhereClause(Condition(Column("col"), ">", Integer(10))),
        order_by_clause=OrderByClause(OrderByItem(Column("col"),
                                                  has_desc=True)),
        semi_colon=True,
    )
    assert actual == expected
Example #10
0
def test_nested_functions():
    actual = ExpressionParser.parse(to_tokens("test(foo(col))"))
    expected = FunctionCall("test", FunctionCall("foo", Column("col")))
    assert actual == expected
Example #11
0
def test_simple_function_multiple_params():
    actual = ExpressionParser.parse(to_tokens("test(col, 'Test')"))
    expected = FunctionCall("test", Column("col"), String("Test", quotes="'"))
    assert actual == expected
Example #12
0
def test_nested_joins():
    sql = """
SELECT COALESCE(sq_1.col, sq_2.col) f0_

FROM (SELECT ANY_VALUE(col) col,
LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last,
hash
FROM (SELECT *
FROM `events`
WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510')
JOIN
(SELECT * EXCEPT (hash)
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY hash) AS rn
FROM `test-table`
WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501')
WHERE rn = 1)
USING (hash)
GROUP BY hash) sq_1

FULL OUTER JOIN

(SELECT ANY_VALUE(col) col,
hash
FROM (SELECT *
FROM `events`
WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410')
JOIN
(SELECT * EXCEPT (hash),
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY hash) AS rn
FROM `test-table`
WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401')
WHERE rn = 1)
USING (hash)
GROUP BY hash) sq_2

ON sq_1.hash = sq_2.hash
WHERE sq_1.last = 1
GROUP BY f0_
"""  # noqa
    actual = SQLStatementParser.parse(to_tokens(sql))
    expected = SelectStatement(
        expressions=[
            Alias(
                FunctionCall("coalesce", Column("sq_1.col"),
                             Column("sq_2.col")),
                "f0_",
                with_as=False,
            )
        ],
        from_statement=Join(
            "FULL OUTER JOIN",
            left_from=Alias(
                Parenthesis(
                    SelectStatement(
                        expressions=[
                            Alias(
                                FunctionCall("any_value", Column("col")),
                                "col",
                                with_as=False,
                            ),
                            Alias(
                                AnalyticsClause(
                                    FunctionCall(
                                        "last_value",
                                        FunctionCall("any_value",
                                                     Column("col2")),
                                    ),
                                    partition_by=[
                                        FunctionCall("any_value",
                                                     Column("col"))
                                    ],
                                    order_by=OrderByClause(
                                        OrderByItem(
                                            FunctionCall(
                                                "sum", Column("clicks")),
                                            has_asc=True,
                                        ),
                                        OrderByItem(
                                            FunctionCall(
                                                "sum", Column("metric")),
                                            has_asc=True,
                                        ),
                                    ),
                                    frame_clause=WindowFrameClause(
                                        "range",
                                        "between unbounded preceding and unbounded following",  # noqa
                                    ),
                                ),
                                "last",
                                with_as=False,
                            ),
                            Column("hash"),
                        ],
                        from_statement=Join(
                            join_type="JOIN",
                            left_from=Parenthesis(
                                SelectStatement(
                                    expressions=[Column("*")],
                                    from_statement=Table(
                                        String("events", quotes="`")),
                                    where_clause=WhereClause(
                                        Condition(
                                            Column("_table_suffix"),
                                            "between",
                                            BooleanCondition(
                                                "and",
                                                String("20200410", quotes="'"),
                                                String("20200510", quotes="'"),
                                            ),
                                        )),
                                    semi_colon=False,
                                )),
                            right_from=Parenthesis(
                                SelectStatement(
                                    expressions=[
                                        ExceptClause(Column("*"),
                                                     [Column("hash")])
                                    ],
                                    from_statement=Parenthesis(
                                        SelectStatement(
                                            expressions=[
                                                Column("*"),
                                                Alias(
                                                    AnalyticsClause(
                                                        FunctionCall(
                                                            "row_number"),
                                                        partition_by=[
                                                            Column("hash")
                                                        ],
                                                        order_by=None,
                                                        frame_clause=None,
                                                    ),
                                                    Column("rn"),
                                                    with_as=True,
                                                ),
                                            ],
                                            from_statement=Table(
                                                String("test-table",
                                                       quotes="`")),
                                            where_clause=WhereClause(
                                                Condition(
                                                    Column("_table_suffix"),
                                                    "between",
                                                    BooleanCondition(
                                                        "and",
                                                        String("20200401",
                                                               quotes="'"),
                                                        String("20200501",
                                                               quotes="'"),
                                                    ),
                                                )),
                                            semi_colon=False,
                                        )),
                                    where_clause=WhereClause(
                                        Condition(Column("rn"), "=",
                                                  Integer(1))),
                                    semi_colon=False,
                                )),
                            on=None,
                            using=UsingClause(Parenthesis(Column("hash"))),
                        ),
                        group_by_clause=GroupByClause(Column("hash")),
                        semi_colon=False,
                    )),
                "sq_1",
                with_as=False,
            ),
            right_from=Alias(
                Parenthesis(
                    SelectStatement(
                        expressions=[
                            Alias(
                                FunctionCall("any_value", Column("col")),
                                "col",
                                with_as=False,
                            ),
                            Column("hash"),
                        ],
                        from_statement=Join(
                            join_type="JOIN",
                            left_from=Parenthesis(
                                SelectStatement(
                                    expressions=[Column("*")],
                                    from_statement=Table(
                                        String("events", quotes="`")),
                                    where_clause=WhereClause(
                                        Condition(
                                            Column("_table_suffix"),
                                            "between",
                                            BooleanCondition(
                                                "and",
                                                String("20200310", quotes="'"),
                                                String("20200410", quotes="'"),
                                            ),
                                        )),
                                    semi_colon=False,
                                )),
                            right_from=Parenthesis(
                                SelectStatement(
                                    expressions=[
                                        ExceptClause(Column("*"),
                                                     [Column("hash")])
                                    ],
                                    from_statement=Parenthesis(
                                        SelectStatement(
                                            expressions=[
                                                Column("*"),
                                                Alias(
                                                    AnalyticsClause(
                                                        FunctionCall(
                                                            "row_number"),
                                                        partition_by=[
                                                            Column("hash")
                                                        ],
                                                        order_by=None,
                                                        frame_clause=None,
                                                    ),
                                                    Column("rn"),
                                                    with_as=True,
                                                ),
                                            ],
                                            from_statement=Table(
                                                String("test-table",
                                                       quotes="`")),
                                            where_clause=WhereClause(
                                                Condition(
                                                    Column("_table_suffix"),
                                                    "between",
                                                    BooleanCondition(
                                                        "and",
                                                        String("20200301",
                                                               quotes="'"),
                                                        String("20200401",
                                                               quotes="'"),
                                                    ),
                                                )),
                                            semi_colon=False,
                                        ), ),
                                    where_clause=WhereClause(
                                        Condition(Column("rn"), "=",
                                                  Integer(1))),
                                    semi_colon=False,
                                )),
                            on=None,
                            using=UsingClause(Parenthesis(Column("hash"))),
                        ),
                        group_by_clause=GroupByClause(Column("hash")),
                        semi_colon=False,
                    )),
                "sq_2",
                with_as=False,
            ),
            on=OnClause(
                Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))),
            using=None,
        ),
        where_clause=WhereClause(
            Condition(Column("sq_1.last"), "=", Integer(1))),
        group_by_clause=GroupByClause(Column("f0_")),
        semi_colon=False,
    )
    assert actual == expected
Example #13
0
def test_index_function_access():
    actual = ExpressionParser.parse(to_tokens("array[index(0)]"))
    expected = Index(Column("array"), [FunctionCall("index", Integer(0))])
    assert actual == expected
Example #14
0
def test_simple_function_parsing_no_args():
    actual = ExpressionParser.parse(to_tokens("test()"))
    expected = FunctionCall("test")
    assert actual == expected
Example #15
0
    def parse(tokens):
        next_token = next(tokens)
        if next_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            try:
                argument = SQLStatementParser.parse(iter(argument_tokens))
            except ParsingError:
                argument = FromStatementParser.parse(iter(argument_tokens))
            expression = Parenthesis(argument)
        elif next_token == "'" or next_token == '"' or next_token == "`":
            expression = Table(StringParser.parse(tokens, next_token))
        else:
            if next_token == "unnest":
                next_next_token = next(tokens)
                assert next_next_token == "("
                argument_tokens = get_tokens_until_closing_parenthesis(tokens)
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = FunctionCall(next_token, *arguments)
            else:
                expression = Table(next_token)

        next_token = next(tokens, None)
        if (next_token is not None and next_token not in Join.VALUES
                and next_token not in CombinedQueries.SET_OPERATORS):
            if next_token == "as":
                with_as = True
                alias = next(tokens)
            else:
                with_as = False
                alias = next_token
            expression = Alias(expression, alias, with_as)
            next_token = next(tokens, None)

        while next_token in CombinedQueries.SET_OPERATORS:
            left_expr = expression
            expression_tokens, next_token = get_tokens_until_not_in(
                tokens, CombinedQueries.SET_OPERATORS, first_token=next_token)
            set_operator = SetOperatorTypeParser.parse(iter(expression_tokens))
            expression_tokens, next_token = get_tokens_until_one_of(
                tokens,
                CombinedQueries.SET_OPERATORS,
                first_token=next_token,
            )
            if expression_tokens[0] == "select":
                right_expr = SelectStatementParser.parse(
                    iter(expression_tokens[1:]))
            else:
                right_expr = FromStatementParser.parse(iter(expression_tokens))
            expression = CombinedQueries(set_operator, left_expr, right_expr)

            if (next_token is not None
                    and next_token not in CombinedQueries.SET_OPERATORS):
                if next_token == "as":
                    with_as = True
                    alias = next(tokens)
                else:
                    with_as = False
                    alias = next_token
                expression = Alias(expression, alias, with_as)
                next_token = next(tokens, None)

        while next_token in Join.VALUES:
            left_expr = expression
            expression_tokens, next_token = get_tokens_until_not_in(
                tokens, Join.VALUES, first_token=next_token)
            join_type = JoinTypeParser.parse(iter(expression_tokens))
            expression_tokens, next_token = get_tokens_until_one_of(
                tokens, ("on", "using"), first_token=next_token)
            right_expr = FromStatementParser.parse(iter(expression_tokens))
            on = None
            using = None
            on_or_using = next_token
            expression_tokens, next_token = get_tokens_until_one_of(
                tokens,
                Join.VALUES,
            )
            if on_or_using == "on":
                expression = ExpressionParser.parse(iter(expression_tokens))
                on = OnClause(expression)
            elif on_or_using == "using":
                expressions = ExpressionParser.parse(iter(expression_tokens))
                using = UsingClause(expressions)
            else:
                raise ParsingError("Missing ON or USING for join")

            expression = Join(join_type,
                              left_expr,
                              right_expr,
                              on=on,
                              using=using)

            if next_token is not None and next_token not in Join.VALUES:
                if next_token == "as":
                    with_as = True
                    alias = next(tokens)
                else:
                    with_as = False
                    alias = next_token
                expression = Alias(expression, alias, with_as)
                next_token = next(tokens, None)

        return expression
Example #16
0
def test_simple_function_parsing():
    actual = ExpressionParser.parse(to_tokens("test(col)"))
    expected = FunctionCall("test", Column("col"))
    assert actual == expected
Example #17
0
def test_function_with_single_comma_string_param():
    actual, _ = ExpressionParser.parse(to_tokens("test(',')"))
    expected = FunctionCall("test", String(",", quotes="'"))
    assert actual == expected
Example #18
0
    def parse(tokens, is_right_hand=False):
        main_token = next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif main_token in Null.VALUES:
            expression = Null()
        elif main_token in Type.VALUES:
            expression = Type(main_token)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert next_token == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif main_token == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                argument_tokens = get_tokens_until_closing_parenthesis(tokens)
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = FunctionCall(main_token, *arguments)
                next_token = next(tokens, None)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (main_token in String.PREFIXES and next_token is not None
                  and next_token in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if next_token == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if argument_next_token == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if argument_next_token == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or argument_next_token != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if argument_next_token in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause = WindowFrameClause(rows_range,
                                                 " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        if next_token and next_token in ("+", "-", "*", "/"):
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        if is_right_hand:
            return expression, next_token

        if next_token in Condition.PREDICATES:
            symbol = next_token
            if next_token == "is":
                next_next_token = next(tokens)
                if next_next_token == "not":
                    symbol = "is not"
                else:
                    tokens, _ = get_tokens_until_one_of(
                        tokens, [], first_token=next_next_token)
                    tokens = iter(tokens)

            right_hand, next_token = ExpressionParser.parse(tokens,
                                                            is_right_hand=True)
            expression = Condition(expression, symbol, right_hand)
        elif next_token == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            if next_token != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)

        if next_token in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand = ExpressionParser.parse(tokens)
            expression = BooleanCondition(symbol, left_hand, right_hand)
            next_token = next(tokens, None)

        if next_token == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")" and next_token != "'"
                and next_token != '"' and next_token != "`"
                and next_token != ";"):
            if next_token == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens, is_right_hand=True)
            else:
                with_as = False
                alias = next_token
            return Alias(expression, alias, with_as)
        return expression
Example #19
0
def test_parse_date_function1():
    actual = ExpressionParser.parse(to_tokens("DATE('2020-01-01')"))
    expected = FunctionCall("date", String("2020-01-01", quotes="'"))
    assert actual == expected
Example #20
0
    def parse(
        tokens,
        is_right_hand=False,
        can_be_type=False,
        can_alias=True,
        until_one_of=None,
        first_token=None,
        is_chained_columns=False,
    ) -> Tuple[Expression, Any]:
        until_one_of = until_one_of or []

        main_token = first_token or next(tokens)
        next_token = None

        if main_token in String.QUOTES:
            expression = StringParser.parse(tokens, main_token)
        elif main_token.isdigit():
            expression = Integer(main_token)
        elif main_token.replace(".", "").isdigit():
            expression = Float(main_token)
        elif lower(main_token) in Boolean.BOOLEAN_VALUES:
            expression = Boolean(main_token)
        elif lower(main_token) in Null.VALUES:
            expression = Null()
        elif lower(main_token) == Negation.PREDICATE:
            rest_expression, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = Negation(rest_expression)
        elif main_token == "(":
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Parenthesis(*arguments)
        elif main_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            assert next_token == "]", next_token
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Array(*arguments)
            next_token = next(tokens, None)
        elif lower(main_token) == "case":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, ["end"])
            assert lower(next_token) == "end"
            next_token = next(tokens, None)
            expression = CaseParser.parse(iter(argument_tokens))
        elif lower(main_token) == "select":
            argument_tokens, next_token = get_tokens_until_one_of(tokens, [])
            next_token = next(tokens, None)
            expression = SelectStatementParser.parse(iter(argument_tokens))
        else:
            expression = None

        if next_token is None:
            next_token = next(tokens, None)

        # Expressions that need the next_token to be read
        if expression is None:
            if next_token is not None and next_token == "(":
                if lower(main_token) == "cast":
                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens, stop_words=["as"])
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens),
                        is_right_hand=True,
                        until_one_of=until_one_of,
                    )
                    assert lower(next_token) == "as", next_token
                    next_token = next(tokens)
                    cast_type = Type(next_token)
                    expression = CastFunctionCall(column, cast_type)
                    next_token = next(tokens)
                    assert lower(next_token) == ")", next_token
                elif lower(main_token) == "array_agg":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    column_tokens, next_token = get_tokens_until_one_of(
                        tokens,
                        stop_words=[
                            ")", "ignore", "respects", "order", "limit"
                        ],
                        first_token=first_token,
                    )
                    column, _ = ExpressionParser.parse(
                        iter(column_tokens), until_one_of=until_one_of)

                    ignore_nulls = respect_nulls = False
                    if lower(next_token) == "ignore":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        ignore_nulls = True
                        next_token = next(tokens)
                    elif lower(next_token) == "respect":
                        next_token = next(tokens)
                        assert lower(next_token) == "nulls"
                        respect_nulls = True
                        next_token = next(tokens)

                    if lower(next_token) == "order":
                        next_token = next(tokens)
                        assert lower(next_token) == "by"
                        expression_tokens, next_token = get_tokens_until_one_of(
                            tokens, ["limit", ")"])
                        order_bys = OrderByParser.parse(
                            iter(expression_tokens))
                    else:
                        order_bys = None

                    limit = None
                    if lower(next_token) == "limit":
                        next_token = next(tokens)
                        limit = int(next_token)
                        next_token = next(tokens)

                    assert lower(next_token) == ")", next_token
                    expression = ArrayAggFunctionCall(
                        column=column,
                        distinct=distinct,
                        ignore_nulls=ignore_nulls,
                        respect_nulls=respect_nulls,
                        order_bys=order_bys,
                        limit=limit,
                    )
                elif lower(main_token) == "count":
                    next_token = next(tokens)
                    if lower(next_token) == "distinct":
                        distinct = True
                        first_token = None
                    else:
                        distinct = False
                        first_token = next_token

                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens, first_token=first_token)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens))
                    expression = CountFunctionCall(*arguments,
                                                   distinct=distinct)
                else:
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    arguments_can_be_type = can_be_type or any(
                        lower(t) == "timestamp_trunc" for t in argument_tokens)
                    arguments = ExpressionListParser.parse(
                        iter(argument_tokens),
                        can_be_type=arguments_can_be_type)
                    expression = FunctionCall(main_token, *arguments)

                next_token = next(tokens, None)
                if next_token and lower(next_token) == "filter":
                    next_next_token = next(tokens)
                    assert next_next_token == "(", next_next_token
                    argument_tokens = get_tokens_until_closing_parenthesis(
                        tokens)
                    assert lower(
                        argument_tokens[0]) == "where", argument_tokens
                    filter_condition, next_token = ExpressionParser.parse(
                        iter(argument_tokens[1:]),
                        can_alias=False,
                    )

                    expression = FilteredFunctionCall(expression,
                                                      filter_condition)
                    next_token = next(tokens, None)

            elif (next_token is not None
                  and lower(main_token) in DatePartExtraction.PARTS
                  and lower(next_token) == "from"):
                rest_expression, next_token = ExpressionParser.parse(
                    tokens, until_one_of=until_one_of)
                expression = DatePartExtraction(main_token, rest_expression)
            elif lower(main_token) in Type.VALUES and can_be_type:
                expression = Type(main_token)
            elif next_token is not None and next_token == "[":
                argument_tokens, next_token = get_tokens_until_one_of(
                    tokens, stop_words=["]"])
                arguments = ExpressionListParser.parse(iter(argument_tokens))
                expression = Index(
                    Column(main_token),
                    arguments)  # left item will not always be a column
                next_token = next(tokens, None)
            elif next_token is not None and main_token == "-" and next_token.isdigit(
            ):
                expression = Integer(-int(next_token))
                next_token = next(tokens, None)
            elif (next_token is not None and main_token == "-"
                  and next_token.replace(".", "").isdigit()):
                expression = Float(-float(next_token))
                next_token = next(tokens, None)
            elif (lower(main_token) in String.PREFIXES
                  and next_token is not None
                  and lower(next_token) in String.QUOTES):
                expression = StringParser.parse(tokens,
                                                start_quote=next_token,
                                                prefix=main_token)
            else:
                expression = Column(main_token)

        if lower(next_token) == "over":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")

            argument_tokens = iter(
                get_tokens_until_closing_parenthesis(tokens))
            argument_next_token = next(argument_tokens, None)
            if lower(argument_next_token) == "partition":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after PARTITION")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["order", "rows", "range"])
                partition_by = ExpressionListParser.parse(
                    iter(expression_tokens))
            else:
                partition_by = None

            if lower(argument_next_token) == "order":
                argument_next_token = next(argument_tokens, None)
                if not argument_next_token or lower(
                        argument_next_token) != "by":
                    raise ParsingError("Missing BY after ORDER")
                expression_tokens, argument_next_token = get_tokens_until_one_of(
                    argument_tokens, ["rows", "range"])
                order_by = OrderByParser.parse(iter(expression_tokens))
            else:
                order_by = None

            if lower(argument_next_token) in ("rows", "range"):
                rows_range = argument_next_token
                expression_tokens, _ = get_tokens_until_one_of(
                    argument_tokens, [])
                frame_clause: Optional[WindowFrameClause] = WindowFrameClause(
                    rows_range, " ".join(expression_tokens))
            else:
                frame_clause = None

            expression = AnalyticsClause(
                expression,
                partition_by=partition_by,
                order_by=order_by,
                frame_clause=frame_clause,
            )
            next_token = next(tokens, None)

        while next_token == ".":
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of, is_chained_columns=True)
            expression = ChainedColumns(expression, right_hand)

        if next_token and next_token in ("+", "-", "*",
                                         "/") and not is_chained_columns:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
            )
            expression = ArithmaticOperator(symbol, left_hand, right_hand)

        while next_token == "[":
            argument_tokens, next_token = get_tokens_until_one_of(
                tokens, stop_words=["]"])
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = Index(expression, arguments)
            next_token = next(tokens, None)

        if is_right_hand or is_chained_columns:
            return expression, next_token

        if lower(next_token) in Condition.PREDICATES:
            first_token = None
            symbol = next_token
            if lower(next_token) == "is":
                next_next_token = next(tokens)
                if lower(next_next_token) == "not":
                    symbol = "is not"
                else:
                    first_token = next_next_token
            elif lower(next_token) == "not":
                next_next_token = next(tokens)
                if lower(next_next_token) == "in":
                    symbol = "not in"
                else:
                    first_token = next_next_token

            right_hand, next_token = ExpressionParser.parse(
                tokens,
                is_right_hand=True,
                until_one_of=until_one_of,
                first_token=first_token,
            )
            expression = Condition(expression, symbol, right_hand)
        elif lower(next_token) == "between":
            symbol = next_token
            right_hand_left, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            if lower(next_token) != "and":
                raise ParsingError("expected AND")
            right_hand_right, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            right_hand = BooleanCondition(
                "and",
                right_hand_left,
                right_hand_right,
            )
            expression = Condition(expression, symbol, right_hand)
        elif next_token in BitwiseOperation.OPERATORS:
            operator = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, is_right_hand=True, until_one_of=until_one_of)
            expression = BitwiseOperation(expression, operator, right_hand)

        if lower(next_token) in BooleanCondition.PREDICATES:
            left_hand = expression
            symbol = next_token
            right_hand, next_token = ExpressionParser.parse(
                tokens, until_one_of=until_one_of)
            right_alias = None
            if isinstance(right_hand, Alias):
                right_alias = right_hand
                right_hand = right_hand.expression
            expression = BooleanCondition(symbol, left_hand, right_hand)
            if right_alias is not None:
                right_alias.expression = expression
                expression = right_alias

        if lower(next_token) == "except":
            opening_parenthesis = next(tokens, None)
            if opening_parenthesis != "(":
                raise ParsingError("expected '('")
            argument_tokens = get_tokens_until_closing_parenthesis(tokens)
            arguments = ExpressionListParser.parse(iter(argument_tokens))
            expression = ExceptClause(expression, arguments)
            next_token = next(tokens, None)

        if (next_token is not None and next_token != ")"
                and not (next_token in String.QUOTES
                         and isinstance(expression, String))
                and next_token != ";" and lower(next_token) not in until_one_of
                and can_alias):
            if lower(next_token) == "as":
                with_as = True
                alias, _ = ExpressionParser.parse(tokens,
                                                  is_right_hand=True,
                                                  until_one_of=until_one_of)
            else:
                with_as = False
                alias = next_token
            if alias in String.QUOTES:
                alias = StringParser.parse(tokens, alias)
            return Alias(expression, alias, with_as), next(tokens, None)
        return expression, next_token
Example #21
0
def test_parse_date_function():
    actual = ExpressionParser.parse(to_tokens("col >= DATE('2020-01-01')"))
    expected = Condition(
        Column("col"), ">=",
        FunctionCall("date", String("2020-01-01", quotes="'")))
    assert actual == expected