def test_tuple_unaliaser(input_query, expected_query): set_config("tuple_unaliaser_rollout", 1) settings = HTTPQuerySettings() TupleUnaliaser().process_query(input_query, settings) assert input_query == expected_query
def test_transform_column_names() -> None: """ Runs a simple query containing selected expressions names that do not match the aliases of the expressions themselves. It verifies that the names of the columns in the result correspond to the SelectedExpression names and not to the expression aliases (which are supposed to be internal). """ events_storage = get_entity(EntityKey.EVENTS).get_writable_storage() assert events_storage is not None event_id = uuid.uuid4().hex event_date = datetime.utcnow() write_unprocessed_events( events_storage, [ InsertEvent({ "event_id": event_id, "group_id": 10, "primary_hash": uuid.uuid4().hex, "project_id": 1, "message": "a message", "platform": "python", "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT), "data": { "received": time.time() }, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, }) ], ) query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ # The selected expression names are those provided by the # user in the query and those the user expect in the response. # Aliases will be internal names to prevent shadowing. SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")), SelectedExpression( "message", FunctionCall( "_snuba_message", "ifNull", (Column(None, None, "message"), Literal(None, "")), ), ), ], ) query_settings = HTTPQuerySettings(referrer="asd") dataset = get_dataset("events") timer = Timer("test") result = parse_and_run_query( dataset, Request( id="asd", original_body={}, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("blah"), "blah", None, None, None), ), timer, ) data = result.result["data"] assert data == [{"event_id": event_id, "message": "a message"}] meta = result.result["meta"] assert meta == [ MetaColumn(name="event_id", type="String"), MetaColumn(name="message", type="String"), ]
def test_format_expressions(pre_format: Query, expected_query: Query) -> None: copy = deepcopy(pre_format) BasicFunctionsProcessor().process_query(copy, HTTPQuerySettings()) assert copy.get_selected_columns() == expected_query.get_selected_columns() assert copy.get_groupby() == expected_query.get_groupby() assert copy.get_condition() == expected_query.get_condition()
def test_valid_uniq_queries(input_query: ClickhouseQuery) -> None: set_config("throw_on_uniq_select_and_having", True) og_query = deepcopy(input_query) UniqInSelectAndHavingProcessor().process_query(input_query, HTTPQuerySettings()) # query should not change assert og_query == input_query
def test_apdex_format_expressions() -> None: unprocessed = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", FunctionCall( "perf", "apdex", (Column(None, None, "column1"), Literal(None, 300)) ), ), ], ) expected = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( plus( FunctionCall( None, "countIf", ( binary_condition( ConditionFunctions.LTE, Column(None, None, "column1"), Literal(None, 300), ), ), ), divide( FunctionCall( None, "countIf", ( binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.GT, Column(None, None, "column1"), Literal(None, 300), ), binary_condition( ConditionFunctions.LTE, Column(None, None, "column1"), multiply( Literal(None, 300), Literal(None, 4) ), ), ), ), ), Literal(None, 2), ), ), FunctionCall( None, "count", (), ), "perf", ), ), ], ) apdex_processor().process_query(unprocessed, HTTPQuerySettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(divide(plus(countIf(lessOrEquals(column1, 300)), " "divide(countIf(greater(column1, 300) AND " "lessOrEquals(column1, multiply(300, 4))), 2)), count()) AS perf)" )
def test_broken_query() -> None: with pytest.raises(InvalidExpressionException): TagsTypeTransformer().process_query( build_query(Literal(None, "asdasd")), HTTPQuerySettings())
def test_invalid_uniq_queries(input_query: ClickhouseQuery) -> None: set_config("throw_on_uniq_select_and_having", True) with pytest.raises(MismatchedAggregationException): UniqInSelectAndHavingProcessor().process_query(input_query, HTTPQuerySettings())
def test_handled_processor() -> None: entity = QueryEntity(EntityKey.EVENTS, ColumnSet([])) unprocessed = Query( entity, selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "isHandled", tuple(), ), ), ], ) expected = Query( entity, selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayAll", ( Lambda( None, ("x",), binary_condition( BooleanFunctions.OR, FunctionCall(None, "isNull", (Argument(None, "x"),)), binary_condition( ConditionFunctions.NEQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"),) ), Literal(None, 0), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled" ) processor.process_query(unprocessed, HTTPQuerySettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(arrayAll((x -> (isNull(x) OR notEquals(assumeNotNull(x), 0))), exception_stacks.mechanism_handled) AS result)" )
def test_events_promoted_boolean_context() -> None: columns = ColumnSet( [ ("device_charging", UInt(8, Modifier(nullable=True))), ("contexts", Nested([("key", String()), ("value", String())])), ] ) query = ClickhouseQuery( Table("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], ) expected = ClickhouseQuery( Table("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "if", ( binary_condition( ConditionFunctions.IN, FunctionCall( None, "toString", (Column(None, None, "device_charging"),), ), literals_tuple( None, [Literal(None, "1"), Literal(None, "True")] ), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], ) settings = HTTPQuerySettings() MappingColumnPromoter( {"contexts": {"device.charging": "device_charging"}}, cast_to_string=True ).process_query(query, settings) EventsPromotedBooleanContextsProcessor().process_query(query, settings) assert query.get_selected_columns() == expected.get_selected_columns()
def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}}) def callback_func(primary: Optional[Tuple[str, QueryResult]], other: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "query": """ MATCH (events) SELECT type, project_id WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') """, "dataset": "events", } events = get_dataset("events") query, _ = parse_snql_query(query_body["query"], events) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) errors_ro_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS_RO)), ) delegator = PipelineDelegator( query_pipeline_builders={ "errors": errors_pipeline, "errors_ro": errors_ro_pipeline, }, selector_func=lambda query, referrer: ("errors", ["errors_ro"]), split_rate_limiter=True, ignore_secondary_exceptions=True, callback_func=mock_callback, ) runner_call_count = 0 runner_settings: MutableSequence[QuerySettings] = [] def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: nonlocal runner_call_count nonlocal runner_settings runner_call_count += 1 runner_settings.append(settings) return query_result set_config("pipeline_split_rate_limiter", 1) with cv: query_settings = HTTPQuerySettings(referrer="ref") delegator.build_execution_pipeline( Request( id="asd", original_body=query_body, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("ref"), "ref", None, None, None), ), query_runner, ).execute() cv.wait(timeout=5) assert runner_call_count == 2 assert len(runner_settings) == 2 settings, settings_ro = runner_settings # Validate that settings have been duplicated assert id(settings) != id(settings_ro) assert mock_callback.call_args == call( query, query_settings, "ref", Result("errors", query_result, ANY), [Result("errors_ro", query_result, ANY)], )
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model())) request = Request( id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo(get_app_id("default"), "search", None, None, None), ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", sql_anonymized= "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100", start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), stats={ "sample": 10, "error_code": 386 }, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], projects={2}, snql_anonymized=request.snql_anonymized, entity=EntityKey.EVENTS.value, ).to_dict() processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer(). get_stream_loader().get_processor()) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [2], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], }], None, )
def test_tags_expander() -> None: query_body = """ MATCH (events) SELECT count(platform) AS platforms, testF(platform, tags_value) AS top_platforms, f1(tags_key, column2) AS f1_alias, f2() AS f2_alias WHERE tags_key = 'tags_key' AND project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') HAVING tags_value IN tuple('tag') """ events = get_dataset("events") query, _ = parse_snql_query(query_body, events) processor = TagsExpanderProcessor() query_settings = HTTPQuerySettings() processor.process_query(query, query_settings) assert query.get_selected_columns() == [ SelectedExpression( "platforms", FunctionCall( "_snuba_platforms", "count", (Column("_snuba_platform", None, "platform"), ), ), ), SelectedExpression( "top_platforms", FunctionCall( "_snuba_top_platforms", "testF", ( Column("_snuba_platform", None, "platform"), FunctionCall( "_snuba_tags_value", "arrayJoin", (Column(None, None, "tags.value"), ), ), ), ), ), SelectedExpression( "f1_alias", FunctionCall( "_snuba_f1_alias", "f1", ( FunctionCall( "_snuba_tags_key", "arrayJoin", (Column(None, None, "tags.key"), ), ), Column("_snuba_column2", None, "column2"), ), ), ), SelectedExpression("f2_alias", FunctionCall("_snuba_f2_alias", "f2", tuple())), ] condition = query.get_condition() assert condition is not None conds = get_first_level_and_conditions(condition) assert conds[0] == binary_condition( OPERATOR_TO_FUNCTION["="], FunctionCall("_snuba_tags_key", "arrayJoin", (Column(None, None, "tags.key"), )), Literal(None, "tags_key"), ) assert query.get_having() == in_condition( FunctionCall("_snuba_tags_value", "arrayJoin", (Column(None, None, "tags.value"), )), [Literal(None, "tag")], )
def test_composite_planner( logical_query: CompositeQuery[Entity], composite_plan: CompositeQueryPlan, processed_query: CompositeQuery[Table], ) -> None: def assert_subquery_processors_equality( query: SubqueryProcessors, expected: SubqueryProcessors) -> None: assert [type(x) for x in query.plan_processors ] == [type(x) for x in expected.plan_processors] assert [type(x) for x in query.db_processors ] == [type(x) for x in expected.db_processors] plan = CompositeQueryPlanner(deepcopy(logical_query), HTTPQuerySettings()).build_best_plan() report = plan.query.equals(composite_plan.query) assert report[0], f"Mismatch: {report[1]}" # We cannot simply check the equality between the plans because # we need to verify processors are of the same type, they can # be different instances, thus making the simple equality fail. query_processors = plan.root_processors is not None expected_processors = composite_plan.root_processors is not None assert query_processors == expected_processors if plan.root_processors is not None and composite_plan.root_processors is not None: assert_subquery_processors_equality( plan.root_processors, composite_plan.root_processors, ) query_alias_processors = plan.aliased_processors is not None expected_alias_processors = composite_plan.aliased_processors is not None assert query_alias_processors == expected_alias_processors if (plan.aliased_processors is not None and composite_plan.aliased_processors is not None): assert len(plan.aliased_processors) == len( composite_plan.aliased_processors) for k in plan.aliased_processors: assert_subquery_processors_equality( plan.aliased_processors[k], composite_plan.aliased_processors[k], ) def runner( query: Union[ClickhouseQuery, CompositeQuery[Table]], query_settings: QuerySettings, reader: Reader, ) -> QueryResult: report = query.equals(processed_query) assert report[0], f"Mismatch: {report[1]}" return QueryResult( {"data": []}, { "stats": {}, "sql": "", "experiments": {} }, ) CompositeExecutionPipeline(logical_query, HTTPQuerySettings(), runner).execute()
def test_sessions_processing() -> None: query_body = { "query": """ MATCH (sessions) SELECT duration_quantiles, sessions, users WHERE org_id = 1 AND project_id = 1 AND started >= toDateTime('2020-01-01T12:00:00') AND started < toDateTime('2020-01-02T12:00:00') """, "dataset": "sessions", } sessions = get_dataset("sessions") query, snql_anonymized = parse_snql_query(query_body["query"], sessions) request = Request( id="a", original_body=query_body, query=query, snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo(get_app_id("default"), "", None, None, None), ) def query_runner(query: Query, settings: QuerySettings, reader: Reader) -> QueryResult: quantiles = tuple( Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1]) assert query.get_selected_columns() == [ SelectedExpression( "duration_quantiles", CurriedFunctionCall( "_snuba_duration_quantiles", FunctionCall( None, "quantilesIfMerge", quantiles, ), (Column(None, None, "duration_quantiles"), ), ), ), SelectedExpression( "sessions", FunctionCall( "_snuba_sessions", "plus", ( FunctionCall(None, "countIfMerge", (Column(None, None, "sessions"), )), FunctionCall( None, "sumIfMerge", (Column(None, None, "sessions_preaggr"), ), ), ), ), ), SelectedExpression( "users", FunctionCall("_snuba_users", "uniqIfMerge", (Column(None, None, "users"), )), ), ] return QueryResult({}, {}) sessions.get_default_entity().get_query_pipeline_builder( ).build_execution_pipeline(request, query_runner).execute()
def test_with_turbo(query: ClickhouseQuery) -> None: PostReplacementConsistencyEnforcer("project_id", None).process_query( query, HTTPQuerySettings(turbo=True)) assert query.get_condition() == build_in("project_id", [2])
def test_events_boolean_context() -> None: columns = ColumnSet( [("contexts", Nested([("key", String()), ("value", String())]))] ) query = ClickhouseQuery( Table("errors", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], ) expected = ClickhouseQuery( Table("errors", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "if", ( binary_condition( ConditionFunctions.IN, FunctionCall( None, "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), literals_tuple( None, [Literal(None, "1"), Literal(None, "True")] ), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], ) settings = HTTPQuerySettings() EventsBooleanContextsProcessor().process_query(query, settings) assert query.get_selected_columns() == expected.get_selected_columns()
def test_empty_tag_condition(query: Query, expected: Expression) -> None: query_settings = HTTPQuerySettings() processor = EmptyTagConditionProcessor() processor.process_query(query, query_settings) assert query.get_condition() == expected
def test_metrics_processing( entity_name: str, column_name: str, entity_key: EntityKey, translated_value: Expression, ) -> None: settings.ENABLE_DEV_FEATURES = True settings.DISABLED_DATASETS = set() importlib.reload(factory) importlib.reload(storage_factory) importlib.reload(cluster) query_body = { "query": (f"MATCH ({entity_name}) " f"SELECT {column_name} BY org_id, project_id, tags[10] " "WHERE " "timestamp >= toDateTime('2021-05-17 19:42:01') AND " "timestamp < toDateTime('2021-05-17 23:42:01') AND " "org_id = 1 AND " "project_id = 1"), } metrics_dataset = get_dataset("metrics") query, snql_anonymized = parse_snql_query(query_body["query"], metrics_dataset) request = Request( id="", original_body=query_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo(get_app_id("blah"), "blah", None, None, None), ) def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "org_id", Column("_snuba_org_id", None, "org_id"), ), SelectedExpression( "project_id", Column("_snuba_project_id", None, "project_id"), ), SelectedExpression( "tags[10]", FunctionCall( "_snuba_tags[10]", "arrayElement", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", (Column(None, None, "tags.key"), Literal(None, 10)), ), ), ), ), SelectedExpression( column_name, translated_value, ), ] return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, ) entity = get_entity(entity_key) entity.get_query_pipeline_builder().build_execution_pipeline( request, query_runner).execute()