def test_subquery(): actual = SQLStatementParser.parse( to_tokens( "SELECT col" " from (select count(*) col" " from table group by x) WHERE col > 10 ORDER BY col DESC;" ) ) expected = SelectStatement( expressions=[Column("col")], from_statement=Parenthesis( SelectStatement( expressions=[ Alias(CountFunctionCall(Column("*")), "col", with_as=False) ], from_statement=Table(Column("table")), group_by_clause=GroupByClause(Column("x")), semi_colon=False, ) ), where_clause=WhereClause(Condition(Column("col"), ">", Integer(10))), order_by_clause=OrderByClause(OrderByItem(Column("col"), has_desc=True)), semi_colon=True, ) assert actual == expected
def test_group_by_without_from(): actual = SQLStatementParser.parse(to_tokens("SELECT 1 GROUP BY 2")) expected = SelectStatement( expressions=[Integer(1)], group_by_clause=GroupByClause(Integer(2)), semi_colon=False, ) assert actual == expected
def test_select_distinct_on(): actual = SQLStatementParser.parse(to_tokens("SELECT DISTINCT ON (col) col")) expected = SelectStatement( select_distinct=True, select_distinct_on=[Column("col")], expressions=[Column("col")], semi_colon=False, ) assert actual == expected
def test_from_subquery(): actual = FromStatementParser.parse( to_tokens("(select field from table_stmt)")) expected = Parenthesis( SelectStatement( expressions=[Column("field")], from_statement=Table("table_stmt"), semi_colon=False, )) assert actual == expected
def test_limit_parentheses(): actual = SQLStatementParser.parse(to_tokens("SELECT 1 LIMIT (((3)))")) expected = SelectStatement( expressions=[Integer(1)], limit_clause=LimitClause( limit_all=False, expression=Parenthesis(Parenthesis(Parenthesis(Integer(3)))), ), semi_colon=False, ) assert actual == expected
def test_order_by_clause(): actual = SQLStatementParser.parse( to_tokens("SELECT col FROM t ORDER BY col, 2")) expected = SelectStatement( expressions=[Column("col")], from_statement=Table("t"), order_by_clause=OrderByClause(OrderByItem(Column("col")), OrderByItem(Integer(2))), semi_colon=False, ) assert actual == expected
def test_select_boolean_condition_expression(): sql = "select field is not null and col > 0 from t;" actual = SQLStatementParser.parse(to_tokens(sql)) expected = SelectStatement( expressions=[ BooleanCondition( "and", Condition( Column("field"), "is not", Null(), ), Condition( Column("col"), ">", Integer(0), ), ) ], from_statement=Table(Column("t")), ) assert actual == expected
def parse(cls, tokens): first_expression_token = None next_token = next(tokens) select_all = select_distinct = False select_distinct_on = None if lower(next_token) == "all": select_all = True elif lower(next_token) == "distinct": select_distinct = True next_token = next(tokens) if lower(next_token) == "on": next(tokens) # Consume parenthesis distinct_on_tokens = get_tokens_until_closing_parenthesis( tokens) select_distinct_on = ExpressionListParser.parse( iter(distinct_on_tokens)) else: first_expression_token = next_token else: first_expression_token = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, cls.keywords, first_token=first_expression_token) expressions = ExpressionListParser.parse(iter(expression_tokens)) if lower(next_token) == "from": expression_tokens, next_token = get_tokens_until_one_of( tokens, ["where", "group", "having", "order", "limit", "offset", ";"], keep=[("with", "offset")], ) from_statement = FromStatementParser.parse(iter(expression_tokens)) else: from_statement = None if lower(next_token) == "where": where_clause, next_token = WhereClauseParser.parse(tokens) else: where_clause = None if lower(next_token) == "group": next_token = next(tokens, None) group_each_by = False if lower(next_token) == "each": group_each_by = True next_token = next(tokens, None) if not lower(next_token) == "by": raise ParsingError("Missing BY after GROUP") expression_tokens, next_token = get_tokens_until_one_of( tokens, ["having", "order", "limit", "offset", ";"]) group_by_clause = GroupByParser.parse(iter(expression_tokens)) group_by_clause.group_each_by = group_each_by else: group_by_clause = None if lower(next_token) == "having": having_clause, next_token = HavingClauseParser.parse(tokens) else: having_clause = None if lower(next_token) == "order": next_token = next(tokens, None) if not lower(next_token) == "by": raise ParsingError("Missing BY after ORDER") expression_tokens, next_token = get_tokens_until_one_of( tokens, ["limit", "offset", ";"]) order_by_clause = OrderByParser.parse(iter(expression_tokens)) else: order_by_clause = None if lower(next_token) == "limit": expression_tokens, next_token = get_tokens_until_one_of( tokens, ["offset", ";"]) limit_clause = LimitClauseParser.parse(iter(expression_tokens)) else: limit_clause = None if lower(next_token) == "offset": expression_tokens, next_token = get_tokens_until_one_of( tokens, [";"]) offset_clause = OffsetClauseParser.parse(iter(expression_tokens)) else: offset_clause = None semi_colon = bool(next_token and next_token == ";") return SelectStatement( select_all=select_all, select_distinct=select_distinct, select_distinct_on=select_distinct_on, expressions=expressions, from_statement=from_statement, where_clause=where_clause, group_by_clause=group_by_clause, having_clause=having_clause, order_by_clause=order_by_clause, limit_clause=limit_clause, offset_clause=offset_clause, semi_colon=semi_colon, )
def test_nested_joins(): sql = """ SELECT COALESCE(sq_1.col, sq_2.col) f0_ FROM (SELECT ANY_VALUE(col) col, LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510') JOIN (SELECT * EXCEPT (hash) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501') WHERE rn = 1) USING (hash) GROUP BY hash) sq_1 FULL OUTER JOIN (SELECT ANY_VALUE(col) col, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410') JOIN (SELECT * EXCEPT (hash), FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401') WHERE rn = 1) USING (hash) GROUP BY hash) sq_2 ON sq_1.hash = sq_2.hash WHERE sq_1.last = 1 GROUP BY f0_ """ # noqa actual = SQLStatementParser.parse(to_tokens(sql)) expected = SelectStatement( expressions=[ Alias( FunctionCall("coalesce", Column("sq_1.col"), Column("sq_2.col")), "f0_", with_as=False, ) ], from_statement=Join( "FULL OUTER JOIN", left_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Alias( AnalyticsClause( FunctionCall( "last_value", FunctionCall("any_value", Column("col2")), ), partition_by=[ FunctionCall("any_value", Column("col")) ], order_by=OrderByClause( OrderByItem( FunctionCall( "sum", Column("clicks")), has_asc=True, ), OrderByItem( FunctionCall( "sum", Column("metric")), has_asc=True, ), ), frame_clause=WindowFrameClause( "range", "between unbounded preceding and unbounded following", # noqa ), ), "last", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200410", quotes="'"), String("20200510", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200401", quotes="'"), String("20200501", quotes="'"), ), )), semi_colon=False, )), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_1", with_as=False, ), right_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200310", quotes="'"), String("20200410", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200301", quotes="'"), String("20200401", quotes="'"), ), )), semi_colon=False, ), ), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_2", with_as=False, ), on=OnClause( Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))), using=None, ), where_clause=WhereClause( Condition(Column("sq_1.last"), "=", Integer(1))), group_by_clause=GroupByClause(Column("f0_")), semi_colon=False, ) assert actual == expected
def test_select_distinct(): actual = SQLStatementParser.parse(to_tokens("SELECT DISTINCT 1")) expected = SelectStatement(select_distinct=True, expressions=[Integer(1)], semi_colon=False) assert actual == expected
def test_select_all(): actual = SQLStatementParser.parse(to_tokens("SELECT ALL 1")) expected = SelectStatement(select_all=True, expressions=[Integer(1)], semi_colon=False) assert actual == expected