def test_select_storage(query_body: MutableMapping[str, Any], is_subscription: bool, expected_table: str) -> None: sessions = get_dataset("sessions") snql_query = json_to_snql(query_body, "sessions") query, snql_anonymized = parse_snql_query(str(snql_query), sessions) query_body = json.loads(snql_query.snuba()) subscription_settings = (SubscriptionQuerySettings if is_subscription else HTTPQuerySettings) request = Request( id="a", original_body=query_body, query=query, snql_anonymized=snql_anonymized, query_settings=subscription_settings(referrer=""), attribution_info=AttributionInfo(get_app_id("default"), "blah", None, None, None), ) def query_runner(query: Query, settings: QuerySettings, reader: Reader) -> QueryResult: assert query.get_from_clause().table_name == expected_table return QueryResult({}, {}) sessions.get_default_entity().get_query_pipeline_builder( ).build_execution_pipeline(request, query_runner).execute()
def test_events_processing() -> None: query_body = { "query": """ MATCH (events) SELECT tags[transaction], contexts[browser.name] WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') """, "dataset": "events", } events_dataset = get_dataset("events") events_entity = events_dataset.get_default_entity() query, snql_anonymized = parse_snql_query(query_body["query"], events_dataset) request = Request( id="", original_body=query_body, query=query, snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo(get_app_id("blah"), "blah", None, None, None), ) def query_runner(query: Query, settings: QuerySettings, reader: Reader) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "tags[transaction]", Column("_snuba_tags[transaction]", None, "transaction_name"), ), SelectedExpression( "contexts[browser.name]", FunctionCall( "_snuba_contexts[browser.name]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "browser.name"), ), ), ), ), ), ] return QueryResult({}, {}) events_entity.get_query_pipeline_builder().build_execution_pipeline( request, query_runner).execute()
def build_request( body: MutableMapping[str, Any], parser: Parser, settings_class: Union[Type[HTTPQuerySettings], Type[SubscriptionQuerySettings]], schema: RequestSchema, dataset: Dataset, timer: Timer, referrer: str, custom_processing: Optional[CustomProcessors] = None, ) -> Request: with sentry_sdk.start_span(description="build_request", op="validate") as span: try: request_parts = schema.validate(body) if settings_class == HTTPQuerySettings: query_settings: MutableMapping[str, bool | str] = { **request_parts.query_settings, "consistent": _consistent_override( request_parts.query_settings.get("consistent", False), referrer ), } query_settings["referrer"] = referrer # TODO: referrer probably doesn't need to be passed in, it should be from the body settings_obj: Union[ HTTPQuerySettings, SubscriptionQuerySettings ] = settings_class( **query_settings, ) elif settings_class == SubscriptionQuerySettings: settings_obj = settings_class( consistent=_consistent_override(True, referrer), ) query, snql_anonymized = parser( request_parts, settings_obj, dataset, custom_processing ) project_ids = get_object_ids_in_query_ast(query, "project_id") if project_ids is not None and len(project_ids) == 1: sentry_sdk.set_tag("snuba_project_id", project_ids.pop()) org_ids = get_object_ids_in_query_ast(query, "org_id") if org_ids is not None and len(org_ids) == 1: sentry_sdk.set_tag("snuba_org_id", org_ids.pop()) attribution_info = dict(request_parts.attribution_info) # TODO: clean this up attribution_info["app_id"] = get_app_id( request_parts.attribution_info["app_id"] ) attribution_info["referrer"] = referrer request_id = uuid.uuid4().hex request = Request( id=request_id, # TODO: Replace this with the actual query raw body. # this can have an impact on subscriptions so we need # to be careful with the change. original_body=body, query=query, attribution_info=AttributionInfo(**attribution_info), query_settings=settings_obj, snql_anonymized=snql_anonymized, ) except (InvalidJsonRequestException, InvalidQueryException) as exception: record_invalid_request(timer, referrer) raise exception except Exception as exception: record_error_building_request(timer, referrer) raise exception span.set_data( "snuba_query_parsed", repr(query).split("\n"), ) span.set_data( "snuba_query_raw", textwrap.wrap(repr(request.original_body), 100, break_long_words=False), ) sentry_sdk.add_breadcrumb( category="query_info", level="info", message="snuba_query_raw", data={ "query": textwrap.wrap( repr(request.original_body), 100, break_long_words=False ) }, ) timer.mark("validate_schema") return request
def test_transform_column_names() -> None: """ Runs a simple query containing selected expressions names that do not match the aliases of the expressions themselves. It verifies that the names of the columns in the result correspond to the SelectedExpression names and not to the expression aliases (which are supposed to be internal). """ events_storage = get_entity(EntityKey.EVENTS).get_writable_storage() assert events_storage is not None event_id = uuid.uuid4().hex event_date = datetime.utcnow() write_unprocessed_events( events_storage, [ InsertEvent({ "event_id": event_id, "group_id": 10, "primary_hash": uuid.uuid4().hex, "project_id": 1, "message": "a message", "platform": "python", "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT), "data": { "received": time.time() }, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, }) ], ) query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ # The selected expression names are those provided by the # user in the query and those the user expect in the response. # Aliases will be internal names to prevent shadowing. SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")), SelectedExpression( "message", FunctionCall( "_snuba_message", "ifNull", (Column(None, None, "message"), Literal(None, "")), ), ), ], ) query_settings = HTTPQuerySettings(referrer="asd") dataset = get_dataset("events") timer = Timer("test") result = parse_and_run_query( dataset, Request( id="asd", original_body={}, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("blah"), "blah", None, None, None), ), timer, ) data = result.result["data"] assert data == [{"event_id": event_id, "message": "a message"}] meta = result.result["meta"] assert meta == [ MetaColumn(name="event_id", type="String"), MetaColumn(name="message", type="String"), ]
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model())) request = Request( id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo(get_app_id("default"), "search", None, None, None), ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", sql_anonymized= "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100", start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), stats={ "sample": 10, "error_code": 386 }, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], projects={2}, snql_anonymized=request.snql_anonymized, entity=EntityKey.EVENTS.value, ).to_dict() processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer(). get_stream_loader().get_processor()) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [2], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], }], None, )
def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}}) def callback_func(primary: Optional[Tuple[str, QueryResult]], other: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "query": """ MATCH (events) SELECT type, project_id WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') """, "dataset": "events", } events = get_dataset("events") query, _ = parse_snql_query(query_body["query"], events) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) errors_ro_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS_RO)), ) delegator = PipelineDelegator( query_pipeline_builders={ "errors": errors_pipeline, "errors_ro": errors_ro_pipeline, }, selector_func=lambda query, referrer: ("errors", ["errors_ro"]), split_rate_limiter=True, ignore_secondary_exceptions=True, callback_func=mock_callback, ) runner_call_count = 0 runner_settings: MutableSequence[QuerySettings] = [] def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: nonlocal runner_call_count nonlocal runner_settings runner_call_count += 1 runner_settings.append(settings) return query_result set_config("pipeline_split_rate_limiter", 1) with cv: query_settings = HTTPQuerySettings(referrer="ref") delegator.build_execution_pipeline( Request( id="asd", original_body=query_body, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("ref"), "ref", None, None, None), ), query_runner, ).execute() cv.wait(timeout=5) assert runner_call_count == 2 assert len(runner_settings) == 2 settings, settings_ro = runner_settings # Validate that settings have been duplicated assert id(settings) != id(settings_ro) assert mock_callback.call_args == call( query, query_settings, "ref", Result("errors", query_result, ANY), [Result("errors_ro", query_result, ANY)], )
def test_sessions_processing() -> None: query_body = { "query": """ MATCH (sessions) SELECT duration_quantiles, sessions, users WHERE org_id = 1 AND project_id = 1 AND started >= toDateTime('2020-01-01T12:00:00') AND started < toDateTime('2020-01-02T12:00:00') """, "dataset": "sessions", } sessions = get_dataset("sessions") query, snql_anonymized = parse_snql_query(query_body["query"], sessions) request = Request( id="a", original_body=query_body, query=query, snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo(get_app_id("default"), "", None, None, None), ) def query_runner(query: Query, settings: QuerySettings, reader: Reader) -> QueryResult: quantiles = tuple( Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1]) assert query.get_selected_columns() == [ SelectedExpression( "duration_quantiles", CurriedFunctionCall( "_snuba_duration_quantiles", FunctionCall( None, "quantilesIfMerge", quantiles, ), (Column(None, None, "duration_quantiles"), ), ), ), SelectedExpression( "sessions", FunctionCall( "_snuba_sessions", "plus", ( FunctionCall(None, "countIfMerge", (Column(None, None, "sessions"), )), FunctionCall( None, "sumIfMerge", (Column(None, None, "sessions_preaggr"), ), ), ), ), ), SelectedExpression( "users", FunctionCall("_snuba_users", "uniqIfMerge", (Column(None, None, "users"), )), ), ] return QueryResult({}, {}) sessions.get_default_entity().get_query_pipeline_builder( ).build_execution_pipeline(request, query_runner).execute()
def test_metrics_processing( entity_name: str, column_name: str, entity_key: EntityKey, translated_value: Expression, ) -> None: settings.ENABLE_DEV_FEATURES = True settings.DISABLED_DATASETS = set() importlib.reload(factory) importlib.reload(storage_factory) importlib.reload(cluster) query_body = { "query": (f"MATCH ({entity_name}) " f"SELECT {column_name} BY org_id, project_id, tags[10] " "WHERE " "timestamp >= toDateTime('2021-05-17 19:42:01') AND " "timestamp < toDateTime('2021-05-17 23:42:01') AND " "org_id = 1 AND " "project_id = 1"), } metrics_dataset = get_dataset("metrics") query, snql_anonymized = parse_snql_query(query_body["query"], metrics_dataset) request = Request( id="", original_body=query_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo(get_app_id("blah"), "blah", None, None, None), ) def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "org_id", Column("_snuba_org_id", None, "org_id"), ), SelectedExpression( "project_id", Column("_snuba_project_id", None, "project_id"), ), SelectedExpression( "tags[10]", FunctionCall( "_snuba_tags[10]", "arrayElement", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", (Column(None, None, "tags.key"), Literal(None, 10)), ), ), ), ), SelectedExpression( column_name, translated_value, ), ] return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, ) entity = get_entity(entity_key) entity.get_query_pipeline_builder().build_execution_pipeline( request, query_runner).execute()