def dataset_query(dataset: Dataset, body, timer: Timer) -> Response: assert http_request.method == "POST" with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_extensions(), HTTPRequestSettings ) request = build_request(body, schema, timer, dataset, http_request.referrer) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult: try: result = parse_and_run_query(dataset, request, timer) payload = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return WebQueryResult(payload, 200) except RawQueryException as e: return WebQueryResult( { "error": {"type": e.err_type, "message": e.message, **e.meta}, "sql": e.sql, "stats": e.stats, "timing": timer.for_json(), }, 429 if e.err_type == "rate-limited" else 500, )
def run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult: try: return QueryResult( { **parse_and_run_query(dataset, request, timer), "timing": timer.for_json(), }, 200, ) except RawQueryException as e: return QueryResult( { "error": { "type": e.err_type, "message": e.message, **e.meta }, "sql": e.sql, "stats": e.stats, "timing": timer.for_json(), }, 429 if e.err_type == "rate-limited" else 500, )
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy if language == Language.SNQL: metrics.increment("snql.query.incoming", tags={"referrer": referrer}) parser: Callable[ [RequestParts, RequestSettings, Dataset], Union[Query, CompositeQuery[Entity]], ] = partial(parse_snql_query, []) else: parser = parse_legacy_query with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language ) request = build_request( body, parser, HTTPRequestSettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) # Some metrics to track the adoption of SnQL query_type = "simple" if language == Language.SNQL: if isinstance(request.query, CompositeQuery): if isinstance(request.query.get_from_clause(), JoinClause): query_type = "join" else: query_type = "subquery" metrics.increment( "snql.query.success", tags={"referrer": referrer, "type": query_type} ) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained if language == Language.SNQL: metrics.increment( "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"}, ) return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def test_simple(): request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( request_body, get_storage( "events").get_schemas().get_read_schema().get_data_source(), ) request = Request( uuid.UUID("a" * 32).hex, query, HTTPRequestSettings(), {}, "search") time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, dataset=get_dataset("events"), timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", stats={"sample": 10}, status="success", trace_id="b" * 32) ]).to_dict() processor = (enforce_table_writer( get_dataset("querylog")).get_stream_loader().get_processor()) assert processor.process_message( message ) == ProcessedMessage(ProcessorAction.INSERT, [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": get_dataset("events"), "projects": [1], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [0], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], }])
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy # Try to detect if new requests are being sent to the api # after the shutdown command has been issued, and if so # how long after. I don't want to do a disk check for # every query, so randomly sample until the shutdown file # is detected, and then log everything if IS_SHUTTING_DOWN or random.random() < 0.05: if IS_SHUTTING_DOWN or check_down_file_exists(): tags = {"dataset": get_dataset_name(dataset)} metrics.increment("post.shutdown.query", tags=tags) diff = time.time() - (shutdown_time() or 0.0) # this should never be None metrics.timing("post.shutdown.query.delay", diff, tags=tags) with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build(HTTPQuerySettings) request = build_request( body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": str(cause), } logger.warning( str(cause), exc_info=True, ) elif isinstance(cause, ClickhouseError): status = get_http_status_for_clickhouse_error(cause) details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, QueryTooLongException): status = 400 details = {"type": "query-too-long", "message": str(cause)} elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.query_settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model())) request = Request( id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo(get_app_id("default"), "search", None, None, None), ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", sql_anonymized= "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100", start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), stats={ "sample": 10, "error_code": 386 }, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], projects={2}, snql_anonymized=request.snql_anonymized, entity=EntityKey.EVENTS.value, ).to_dict() processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer(). get_stream_loader().get_processor()) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [2], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], }], None, )
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query(get_storage(StorageKey.EVENTS).get_schema().get_data_source()) request = Request( uuid.UUID("a" * 32).hex, request_body, query, HTTPRequestSettings(), "search", ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql="select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", stats={"sample": 10}, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], ).to_dict() processor = ( get_writable_storage(StorageKey.QUERYLOG) .get_table_writer() .get_stream_loader() .get_processor() ) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [ { "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [1], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], } ], )