def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) FixedStringArrayColumnProcessor(set(["column1", "column2"]), 32).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", Column(None, None, "column2"), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_type_condition_optimizer() -> None: cond1 = binary_condition( ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1") ) unprocessed_query = Query( Table("errors", ColumnSet([])), condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.NEQ, Column(None, None, "type"), Literal(None, "transaction"), ), cond1, ), ) expected_query = Query( Table("errors", ColumnSet([])), condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1), ) TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) SliceOfMapOptimizer().process_query(unprocessed_query, HTTPRequestSettings()) assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_uuid_array_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) UUIDArrayColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", FunctionCall( None, "arrayMap", ( Lambda( None, ("x", ), FunctionCall( None, "replaceAll", ( FunctionCall(None, "toString", (Argument(None, "x"), )), Literal(None, "-"), Literal(None, ""), ), ), ), Column(None, None, "column2"), ), ), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_hexint_column_processor(unprocessed: Expression, formatted_value: str) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")) ], condition=unprocessed, ) HexIntColumnProcessor(set(["column1" ])).process_query(unprocessed_query, HTTPQuerySettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column1", FunctionCall( None, "lower", (FunctionCall( None, "hex", (Column(None, None, "column1"), ), ), ), ), ) ] condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None: query = Query( Table( table, ColumnSet([]), final=False, sampling_rate=None, mandatory_conditions=mand_conditions, ), None, None, binary_condition( BooleanFunctions.AND, binary_condition( OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) query_ast_copy.add_condition_to_ast(combine_and_conditions(mand_conditions)) assert query.get_condition_from_ast() == query_ast_copy.get_condition_from_ast()
def test_query_data_source() -> None: """ Tests using the Query as a data source """ query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "col1", Column(alias="col1", table_name=None, column_name="col1") ), SelectedExpression( "some_func", FunctionCall( "some_func", "f", (Column(alias="col1", table_name=None, column_name="col1"),), ), ), SelectedExpression( None, Column(alias="col2", table_name=None, column_name="col2") ), ], ) assert query.get_columns() == ColumnSet( [("col1", Any()), ("some_func", Any()), ("_invalid_alias_2", Any())] )
def clickhouse_groups_node( selected_columns: Sequence[SelectedExpression], condition: Optional[Expression] = None, ) -> IndividualNode[Table]: return build_clickhouse_node( "gr", Table("groupedmessage_local", GROUPS_SCHEMA), selected_columns, condition, )
def query_with_timestamp() -> ClickhouseQuery: return ClickhouseQuery( Table("my_table", ColumnSet([])), condition=build_and( build_in("project_id", [2]), build_time_range(datetime(2021, 1, 1), datetime(2021, 1, 2)), ), )
def test_events_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected_query = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", Column("the_message", None, "message"), ), ], ) GroupIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert expected_query.get_selected_columns( ) == unprocessed.get_selected_columns() expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(message AS the_message)", ) for idx, column in enumerate(unprocessed.get_selected_columns()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_event_id_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", Column("the_event_id", None, "event_id") ), ], ) expected = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"),), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) EventIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
def query_with_future_timestamp() -> ClickhouseQuery: return ClickhouseQuery( Table("my_table", ColumnSet([])), condition=build_and( build_in("project_id", [2]), build_time_range(datetime.now() + timedelta(days=1), datetime.now() + timedelta(days=2)), ), )
def clickhouse_assignees_node( selected_columns: Sequence[SelectedExpression], condition: Optional[Expression] = None, ) -> IndividualNode[Table]: return build_clickhouse_node( "as", Table("groupassignee_local", GROUPS_ASSIGNEE), selected_columns, condition, )
def get_query_data_source(relational_source: RelationalSource, final: bool, sampling_rate: Optional[float]) -> Table: assert isinstance(relational_source, TableSource) return Table( table_name=relational_source.get_table_name(), schema=relational_source.get_columns(), final=final, sampling_rate=sampling_rate, mandatory_conditions=relational_source.get_mandatory_conditions(), )
def build_plan(table_name: str, storage_set: StorageSetKey) -> ClickhouseQueryPlan: return ClickhouseQueryPlan( Query(Table(table_name, ColumnSet([]))), SimpleQueryPlanExecutionStrategy( get_cluster(storage_set), db_query_processors=[], ), storage_set, plan_query_processors=[], db_query_processors=[], )
def clickhouse_events_node( selected_columns: Sequence[SelectedExpression], condition: Optional[Expression] = None, groupby: Optional[Sequence[Expression]] = None, ) -> IndividualNode[Table]: return build_clickhouse_node( "ev", Table("sentry_errors", EVENTS_SCHEMA), selected_columns, condition, groupby, )
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = ClickhouseQuery( Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), totals=True, limitby=LimitBy(10, Column(None, None, "environment")), ) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = format_query(query, request_settings) expected = [ "SELECT column1, table1.column2", ["FROM", "my_table FINAL SAMPLE 0.1"], "ARRAY JOIN column1", "WHERE eq(column1, 'blabla')", "GROUP BY column1, table1.column2 WITH TOTALS", "HAVING eq(column1, 123)", "ORDER BY column1 ASC", "LIMIT 10 BY environment", "LIMIT 100 OFFSET 50", ] assert clickhouse_query.structured() == expected
def test_invalid_uuid(unprocessed: Expression) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) with pytest.raises(ColumnTypeError): UUIDColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings())
def test_iterate_over_query() -> None: """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(ConditionFunctions.EQ, column1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, column2, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], prewhere=prewhere, having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, # prewhere column2, Literal(None, "2"), prewhere, ] assert list(query.get_all_expressions()) == expected_expressions
def build_query( selected_columns: Optional[Sequence[Expression]] = None, condition: Optional[Expression] = None, having: Optional[Expression] = None, ) -> ClickhouseQuery: return ClickhouseQuery( Table("test", ColumnSet([])), selected_columns=[ SelectedExpression(name=s.alias, expression=s) for s in selected_columns or [] ], condition=condition, having=having, )
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], omit_if_final_keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], final: bool, ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") # HACK until we migrate these tests to SnQL query_body["selected_columns"] = ["project_id"] query_body["conditions"] += [ ["timestamp", ">=", "2021-01-01T00:00:00"], ["timestamp", "<", "2021-01-02T00:00:00"], ["project_id", "=", 1], ] snql_query = json_to_snql(query_body, "events") query, _ = parse_snql_query(str(snql_query), events) query = identity_translate(query) query.set_from_clause(Table("my_table", all_columns, final=final)) query_settings = HTTPQuerySettings() processor = PrewhereProcessor(keys, omit_if_final=omit_if_final_keys) processor.process_query(query, query_settings) # HACK until we migrate these tests to SnQL def verify_expressions(top_level: Expression, expected: Expression) -> bool: actual_conds = get_first_level_and_conditions(top_level) expected_conds = get_first_level_and_conditions(expected) for cond in expected_conds: if cond not in actual_conds: return False return True if new_ast_condition: condition = query.get_condition() assert condition is not None assert verify_expressions(condition, new_ast_condition) if new_prewhere_ast_condition: prewhere = query.get_prewhere_ast() assert prewhere is not None assert verify_expressions(prewhere, new_prewhere_ast_condition)
def test_query_parameters() -> None: query = Query( Table("my_table", ColumnSet([])), limitby=(100, "environment"), limit=100, offset=50, totals=True, granularity=60, ) assert query.get_limitby() == (100, "environment") assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_from_clause().table_name == "my_table"
def test_set_limit_on_split_query(): storage = get_dataset("events").get_default_entity().get_all_storages()[0] query = ClickhouseQuery( Table("events", storage.get_schema().get_columns()), selected_columns=[ SelectedExpression(col.name, Column(None, None, col.name)) for col in storage.get_schema().get_columns() ], limit=420, ) query_run_count = 0 def do_query(query: ClickhouseQuery, query_settings: QuerySettings) -> QueryResult: nonlocal query_run_count query_run_count += 1 if query_run_count == 1: return QueryResult( result={ "data": [ { "event_id": "a", "project_id": "1", "timestamp": " 2019-10-01 22:33:42", }, { "event_id": "a", "project_id": "1", "timestamp": " 2019-10-01 22:44:42", }, ] }, extra={}, ) else: assert query.get_limit() == 2 return QueryResult({}, {}) ColumnSplitQueryStrategy( id_column="event_id", project_column="project_id", timestamp_column="timestamp", ).execute(query, HTTPQuerySettings(), do_query) assert query_run_count == 2
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") query = identity_translate(parse_query(query_body, events)) query.set_from_clause(Table("my_table", ColumnSet([]), prewhere_candidates=keys)) request_settings = HTTPRequestSettings() processor = PrewhereProcessor() processor.process_query(query, request_settings) assert query.get_condition_from_ast() == new_ast_condition assert query.get_prewhere_ast() == new_prewhere_ast_condition
def test_query_experiments() -> None: query = Query( Table("my_table", ColumnSet([])), limitby=LimitBy(100, [ Column( alias=None, table_name="my_table", column_name="environment") ]), limit=100, offset=50, granularity=60, ) query.set_experiments({"optimization1": True}) assert query.get_experiments() == {"optimization1": True} assert query.get_experiment_value("optimization1") == True assert query.get_experiment_value("optimization2") is None query.add_experiment("optimization2", "group1") assert query.get_experiment_value("optimization2") == "group1" query.set_experiments({"optimization3": 0.5}) assert query.get_experiments() == {"optimization3": 0.5}
def query() -> ClickhouseQuery: return ClickhouseQuery( Table("my_table", ColumnSet([])), condition=build_in("project_id", [2]), )
from snuba.query.processors.mandatory_condition_applier import MandatoryConditionApplier from snuba.reader import Reader from snuba.request.request_settings import HTTPRequestSettings, RequestSettings from snuba.web import QueryResult events_ent = Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()) events_storage = get_entity(EntityKey.EVENTS).get_writable_storage() assert events_storage is not None events_table_name = events_storage.get_table_writer().get_schema( ).get_table_name() events_table = Table( events_table_name, events_storage.get_schema().get_columns(), final=False, sampling_rate=None, mandatory_conditions=events_storage.get_schema().get_data_source(). get_mandatory_conditions(), ) groups_ent = Entity(EntityKey.GROUPEDMESSAGES, get_entity(EntityKey.GROUPEDMESSAGES).get_data_model()) groups_storage = get_storage(StorageKey.GROUPEDMESSAGES) groups_table = Table( groups_storage.get_schema().get_table_name(), groups_storage.get_schema().get_columns(), final=False, sampling_rate=None, mandatory_conditions=groups_storage.get_schema().get_data_source(). get_mandatory_conditions(), )
in_condition, ) from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, Literal from snuba.query.processors.conditions_enforcer import ( MandatoryConditionEnforcer, OrgIdEnforcer, ProjectIdEnforcer, ) from snuba.request.request_settings import HTTPRequestSettings from snuba.state import set_config test_data = [ pytest.param( Query( Table("errors", ColumnSet([])), selected_columns=[], condition=binary_condition( BooleanFunctions.AND, in_condition(Column(None, None, "project_id"), [Literal(None, 123)]), binary_condition( "equals", Column(None, None, "org_id"), Literal(None, 1) ), ), ), True, id="Valid query. Both mandatory columns are there", ), pytest.param( Query( Table("errors", ColumnSet([])),
ERRORS_SCHEMA = ColumnSet([ ("event_id", UUID()), ("project_id", UInt(32)), ("message", String()), ("group_id", UInt(32)), ]) GROUPS_SCHEMA = ColumnSet([ ("id", UInt(32)), ("project_id", UInt(32)), ("group_id", UInt(32)), ("message", String()), ]) GROUPS_ASSIGNEE = ColumnSet([("id", UInt(32)), ("user", String())]) node_err = IndividualNode(alias="err", data_source=Table("errors_local", ERRORS_SCHEMA)) node_group = IndividualNode(alias="groups", data_source=Table("groupedmessage_local", GROUPS_SCHEMA)) node_assignee = IndividualNode(alias="assignee", data_source=Table("groupassignee_local", GROUPS_ASSIGNEE)) test_cases = [ pytest.param( Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")),
FunctionCall( "alias2", "f1", (Column(None, None, "column2"), Column(None, None, "column3")), ), ), SelectedExpression( name=None, expression=SubscriptableReference( None, Column(None, None, "tags"), Literal(None, "myTag") ), ), ], ), ClickhouseQuery( from_clause=Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("alias", Column("alias", "table", "column")), SelectedExpression( "alias2", FunctionCall( "alias2", "f1", (Column(None, None, "column2"), Column(None, None, "column3"),), ), ), SelectedExpression( name=None, expression=SubscriptableReference( None, Column(None, None, "tags"), Literal(None, "myTag") ),