def test_type_condition_optimizer() -> None: cond1 = binary_condition( ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1") ) unprocessed_query = Query( Table("errors", ColumnSet([])), condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.NEQ, Column(None, None, "type"), Literal(None, "transaction"), ), cond1, ), ) expected_query = Query( Table("errors", ColumnSet([])), condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1), ) TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) FixedStringArrayColumnProcessor(set(["column1", "column2"]), 32).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", Column(None, None, "column2"), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) SliceOfMapOptimizer().process_query(unprocessed_query, HTTPRequestSettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) UUIDArrayColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", FunctionCall( None, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, "column2"), ), ), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_hexint_column_processor(unprocessed: Expression, formatted_value: str) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")) ], condition=unprocessed, ) HexIntColumnProcessor(set(["column1" ])).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column1", FunctionCall( None, "lower", (FunctionCall( None, "hex", (Column(None, None, "column1"), ), ), ), ), ) ] condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None: query = Query( Table( table, ColumnSet([]), final=False, sampling_rate=None, mandatory_conditions=mand_conditions, ), None, None, binary_condition( BooleanFunctions.AND, binary_condition( OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) query_ast_copy.add_condition_to_ast(combine_and_conditions(mand_conditions)) assert query.get_condition_from_ast() == query_ast_copy.get_condition_from_ast()
def test_query_data_source() -> None: """ Tests using the Query as a data source """ query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "col1", Column(alias="col1", table_name=None, column_name="col1") ), SelectedExpression( "some_func", FunctionCall( "some_func", "f", (Column(alias="col1", table_name=None, column_name="col1"),), ), ), SelectedExpression( None, Column(alias="col2", table_name=None, column_name="col2") ), ], ) assert query.get_columns() == ColumnSet( [("col1", Any()), ("some_func", Any()), ("_invalid_alias_2", Any())] )
def test_events_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected_query = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", Column("the_message", None, "message"), ), ], ) GroupIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert expected_query.get_selected_columns( ) == unprocessed.get_selected_columns() expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(message AS the_message)", ) for idx, column in enumerate(unprocessed.get_selected_columns()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_event_id_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", Column("the_event_id", None, "event_id") ), ], ) expected = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"),), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) EventIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
def build_plan(table_name: str, storage_set: StorageSetKey) -> ClickhouseQueryPlan: return ClickhouseQueryPlan( Query(Table(table_name, ColumnSet([]))), SimpleQueryPlanExecutionStrategy( get_cluster(storage_set), db_query_processors=[], ), storage_set, plan_query_processors=[], db_query_processors=[], )
def test_invalid_uuid(unprocessed: Expression) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) with pytest.raises(ColumnTypeError): UUIDColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings())
def test_iterate_over_query() -> None: """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(ConditionFunctions.EQ, column1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, column2, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], prewhere=prewhere, having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, # prewhere column2, Literal(None, "2"), prewhere, ] assert list(query.get_all_expressions()) == expected_expressions
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = Query( Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), totals=True, limitby=LimitBy(10, Column(None, None, "environment")), ) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = format_query(query, request_settings) expected = ("SELECT column1, table1.column2 " "FROM my_table FINAL SAMPLE 0.1 " "ARRAY JOIN column1 " "WHERE eq(column1, 'blabla') " "GROUP BY column1, table1.column2 WITH TOTALS " "HAVING eq(column1, 123) " "ORDER BY column1 ASC " "LIMIT 10 BY environment " "LIMIT 100 OFFSET 50") assert clickhouse_query.get_sql() == expected
def test_query_parameters() -> None: query = Query( Table("my_table", ColumnSet([])), limitby=(100, "environment"), limit=100, offset=50, totals=True, granularity=60, ) assert query.get_limitby() == (100, "environment") assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_from_clause().table_name == "my_table"
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") query = parse_query(query_body, events) query.set_data_source(TableSource("my_table", ColumnSet([]), None, keys)) request_settings = HTTPRequestSettings() processor = PrewhereProcessor() processor.process_query(Query(query), request_settings) assert query.get_condition_from_ast() == new_ast_condition assert query.get_prewhere_ast() == new_prewhere_ast_condition
def test_query_experiments() -> None: query = Query( Table("my_table", ColumnSet([])), limitby=LimitBy(100, [ Column( alias=None, table_name="my_table", column_name="environment") ]), limit=100, offset=50, granularity=60, ) query.set_experiments({"optimization1": True}) assert query.get_experiments() == {"optimization1": True} assert query.get_experiment_value("optimization1") == True assert query.get_experiment_value("optimization2") is None query.add_experiment("optimization2", "group1") assert query.get_experiment_value("optimization2") == "group1" query.set_experiments({"optimization3": 0.5}) assert query.get_experiments() == {"optimization3": 0.5}
def test_replace_expression() -> None: """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2,)) condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, prewhere=prewhere, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),)) return exp query.transform_expressions(replace) expected_query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),)) ) ], array_join=None, condition=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], prewhere=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "2"), ), having=None, order_by=[orderby], ) assert query.get_selected_columns() == expected_query.get_selected_columns() assert query.get_condition() == expected_query.get_condition() assert query.get_groupby() == expected_query.get_groupby() assert query.get_having() == expected_query.get_having() assert query.get_orderby() == expected_query.get_orderby() assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions() )
from snuba.query.processors.conditions_enforcer import ( MandatoryConditionEnforcer, OrgIdEnforcer, ProjectIdEnforcer, ) from snuba.request.request_settings import HTTPRequestSettings from snuba.state import set_config test_data = [ pytest.param( Query( Table("errors", ColumnSet([])), selected_columns=[], condition=binary_condition( BooleanFunctions.AND, in_condition(Column(None, None, "project_id"), [Literal(None, 123)]), binary_condition( "equals", Column(None, None, "org_id"), Literal(None, 1) ), ), ), True, id="Valid query. Both mandatory columns are there", ), pytest.param( Query( Table("errors", ColumnSet([])), selected_columns=[], condition=binary_condition( BooleanFunctions.AND, binary_condition(
schema=schema2, ) merged_columns = ColumnSet([ ("timestamp", DateTime()), ("mismatched1", String(Modifiers(nullable=True))), ("mismatched2", String(Modifiers(nullable=True))), ]) test_data = [ pytest.param( Query( Table("discover", merged_columns), selected_columns=[ SelectedExpression( name="_snuba_count_unique_sdk_version", expression=FunctionCall( None, "uniq", (Column(None, None, "mismatched1"), )), ) ], ), Query( Table("discover", merged_columns), selected_columns=[ SelectedExpression( name="_snuba_count_unique_sdk_version", expression=FunctionCall( None, "uniq", (FunctionCall( None, "cast",
import pytest from snuba.clickhouse.columns import ColumnSet from snuba.clickhouse.processors import QueryProcessor from snuba.clickhouse.query import Query from snuba.query.data_source.simple import Table from snuba.query.processors.table_rate_limit import TableRateLimit from snuba.query.query_settings import HTTPQuerySettings from snuba.state import set_config from snuba.state.rate_limit import TABLE_RATE_LIMIT_NAME, RateLimitParameters test_data = [ pytest.param( TableRateLimit(), Query(Table("errors_local", ColumnSet([])), selected_columns=[], condition=None), "table_concurrent_limit_transactions_local", RateLimitParameters( rate_limit_name=TABLE_RATE_LIMIT_NAME, bucket="errors_local", per_second_limit=5000, concurrent_limit=1000, ), id="Set rate limiter on another table", ), pytest.param( TableRateLimit(), Query(Table("errors_local", ColumnSet([])), selected_columns=[], condition=None),
pytest.param( Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), SelectedExpression("column3", Column("al", None, "column3")), ], condition=binary_condition( "eq", lhs=Column("al", None, "column3"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2"), Column("al", None, "column3"), Column(None, None, "column4"), ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")), OrderBy(OrderByDirection.DESC, Column(None, "table1", "column2")), ], ), [