def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) elif next_token == "'" or next_token == '"' or next_token == "`": expression = Table(StringParser.parse(tokens, next_token)) else: if next_token == "unnest": next_next_token = next(tokens) assert next_next_token == "(" argument_tokens = get_tokens_until_closing_parenthesis(tokens) arguments = ExpressionListParser.parse(iter(argument_tokens)) expression = FunctionCall(next_token, *arguments) else: expression = Table(next_token) next_token = next(tokens, None) if (next_token is not None and next_token not in Join.VALUES and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if expression_tokens[0] == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and next_token not in CombinedQueries.SET_OPERATORS): if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while next_token in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if on_or_using == "on": expression = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif on_or_using == "using": expressions = ExpressionParser.parse(iter(expression_tokens)) using = UsingClause(expressions) else: raise ParsingError("Missing ON or USING for join") expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and next_token not in Join.VALUES: if next_token == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def parse(tokens): next_token = next(tokens) if next_token == "(": argument_tokens = get_tokens_until_closing_parenthesis(tokens) try: argument = SQLStatementParser.parse(iter(argument_tokens)) except ParsingError: argument = FromStatementParser.parse(iter(argument_tokens)) expression = Parenthesis(argument) next_token = next(tokens, None) elif next_token in String.QUOTES: table_name = StringParser.parse(tokens, next_token) next_token = next(tokens, None) while next_token == ".": right_hand, next_token = ExpressionParser.parse( tokens, is_chained_columns=True, ) table_name = ChainedColumns(table_name, right_hand) expression = Table(table_name) elif next_token == "[": argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=["]"]) table, _ = ExpressionParser.parse(iter(argument_tokens)) expression = Table(table, in_square_brackets=True) assert next_token == "]", next_token next_token = next(tokens, None) else: if lower(next_token) == "unnest": expression, next_token = UnnestParser.parse(tokens) else: argument_tokens, next_token = get_tokens_until_one_of( tokens, stop_words=Join.VALUES + CombinedQueries.SET_OPERATORS, first_token=next_token, ) expression = Table( ExpressionParser.parse(iter(argument_tokens))[0]) if (next_token is not None and lower(next_token) not in Join.VALUES and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in CombinedQueries.SET_OPERATORS: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token) set_operator = SetOperatorTypeParser.parse(iter(expression_tokens)) expression_tokens, next_token = get_tokens_until_one_of( tokens, CombinedQueries.SET_OPERATORS, first_token=next_token, ) if lower(expression_tokens[0]) == "select": right_expr = SelectStatementParser.parse( iter(expression_tokens[1:])) else: right_expr = FromStatementParser.parse(iter(expression_tokens)) expression = CombinedQueries(set_operator, left_expr, right_expr) if (next_token is not None and lower(next_token) not in CombinedQueries.SET_OPERATORS): if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) while lower(next_token) in Join.VALUES: left_expr = expression expression_tokens, next_token = get_tokens_until_not_in( tokens, Join.VALUES, first_token=next_token) join_type = JoinTypeParser.parse(iter(expression_tokens)) if join_type in ("CROSS JOIN", ","): expression_tokens, next_token = [next_token ] + list(tokens), None else: expression_tokens, next_token = get_tokens_until_one_of( tokens, ("on", "using"), first_token=next_token) right_expr = FromStatementParser.parse(iter(expression_tokens)) on = None using = None on_or_using = next_token expression_tokens, next_token = get_tokens_until_one_of( tokens, Join.VALUES, ) if lower(on_or_using) == "on": expression, _ = ExpressionParser.parse(iter(expression_tokens)) on = OnClause(expression) elif lower(on_or_using) == "using": expressions, _ = ExpressionParser.parse( iter(expression_tokens)) using = UsingClause(expressions) expression = Join(join_type, left_expr, right_expr, on=on, using=using) if next_token is not None and lower(next_token) not in Join.VALUES: if lower(next_token) == "as": with_as = True alias = next(tokens) else: with_as = False alias = next_token if alias in String.QUOTES: alias = StringParser.parse(tokens, alias) expression = Alias(expression, alias, with_as) next_token = next(tokens, None) return expression
def test_nested_joins(): sql = """ SELECT COALESCE(sq_1.col, sq_2.col) f0_ FROM (SELECT ANY_VALUE(col) col, LAST_VALUE(ANY_VALUE(col2)) OVER (PARTITION BY ANY_VALUE(col) ORDER BY SUM(clicks) ASC, SUM(metric) ASC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) last, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200410' AND '20200510') JOIN (SELECT * EXCEPT (hash) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200401' AND '20200501') WHERE rn = 1) USING (hash) GROUP BY hash) sq_1 FULL OUTER JOIN (SELECT ANY_VALUE(col) col, hash FROM (SELECT * FROM `events` WHERE _TABLE_SUFFIX BETWEEN '20200310' AND '20200410') JOIN (SELECT * EXCEPT (hash), FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY hash) AS rn FROM `test-table` WHERE _TABLE_SUFFIX BETWEEN '20200301' AND '20200401') WHERE rn = 1) USING (hash) GROUP BY hash) sq_2 ON sq_1.hash = sq_2.hash WHERE sq_1.last = 1 GROUP BY f0_ """ # noqa actual = SQLStatementParser.parse(to_tokens(sql)) expected = SelectStatement( expressions=[ Alias( FunctionCall("coalesce", Column("sq_1.col"), Column("sq_2.col")), "f0_", with_as=False, ) ], from_statement=Join( "FULL OUTER JOIN", left_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Alias( AnalyticsClause( FunctionCall( "last_value", FunctionCall("any_value", Column("col2")), ), partition_by=[ FunctionCall("any_value", Column("col")) ], order_by=OrderByClause( OrderByItem( FunctionCall( "sum", Column("clicks")), has_asc=True, ), OrderByItem( FunctionCall( "sum", Column("metric")), has_asc=True, ), ), frame_clause=WindowFrameClause( "range", "between unbounded preceding and unbounded following", # noqa ), ), "last", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200410", quotes="'"), String("20200510", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200401", quotes="'"), String("20200501", quotes="'"), ), )), semi_colon=False, )), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_1", with_as=False, ), right_from=Alias( Parenthesis( SelectStatement( expressions=[ Alias( FunctionCall("any_value", Column("col")), "col", with_as=False, ), Column("hash"), ], from_statement=Join( join_type="JOIN", left_from=Parenthesis( SelectStatement( expressions=[Column("*")], from_statement=Table( String("events", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200310", quotes="'"), String("20200410", quotes="'"), ), )), semi_colon=False, )), right_from=Parenthesis( SelectStatement( expressions=[ ExceptClause(Column("*"), [Column("hash")]) ], from_statement=Parenthesis( SelectStatement( expressions=[ Column("*"), Alias( AnalyticsClause( FunctionCall( "row_number"), partition_by=[ Column("hash") ], order_by=None, frame_clause=None, ), Column("rn"), with_as=True, ), ], from_statement=Table( String("test-table", quotes="`")), where_clause=WhereClause( Condition( Column("_table_suffix"), "between", BooleanCondition( "and", String("20200301", quotes="'"), String("20200401", quotes="'"), ), )), semi_colon=False, ), ), where_clause=WhereClause( Condition(Column("rn"), "=", Integer(1))), semi_colon=False, )), on=None, using=UsingClause(Parenthesis(Column("hash"))), ), group_by_clause=GroupByClause(Column("hash")), semi_colon=False, )), "sq_2", with_as=False, ), on=OnClause( Condition(Column("sq_1.hash"), "=", Column("sq_2.hash"))), using=None, ), where_clause=WhereClause( Condition(Column("sq_1.last"), "=", Integer(1))), group_by_clause=GroupByClause(Column("f0_")), semi_colon=False, ) assert actual == expected