def build_request( self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer, metrics: Optional[MetricsBackend] = None, referrer: str = SUBSCRIPTION_REFERRER, ) -> Request: schema = RequestSchema.build(SubscriptionQuerySettings) request = build_request( {"query": self.query}, parse_snql_query, SubscriptionQuerySettings, schema, dataset, timer, referrer, [ self.entity_subscription.validate_query, partial(self.add_conditions, timestamp, offset), ], ) return request
def build_request(self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer) -> Request: """ Returns a Request that can be used to run a query via `parse_and_run_query`. :param dataset: The Dataset to build the request for :param timestamp: Date that the query should run up until :param offset: Maximum offset we should query for """ schema = RequestSchema.build_with_extensions( dataset.get_extensions(), SubscriptionRequestSettings, ) extra_conditions: Sequence[Condition] = [] if offset is not None: extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]] return build_request( { "project": self.project_id, "conditions": [*self.conditions, *extra_conditions], "aggregations": self.aggregations, "from_date": (timestamp - self.time_window).isoformat(), "to_date": timestamp.isoformat(), }, schema, timer, dataset, SUBSCRIPTION_REFERRER, )
def build_request( self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer, metrics: Optional[MetricsBackend] = None, ) -> Request: schema = RequestSchema.build_with_extensions( {}, SubscriptionRequestSettings, Language.SNQL, ) request = build_request( {"query": self.query}, partial( parse_snql_query, [ self.validate_subscription, partial(self.add_conditions, timestamp, offset), ], ), SubscriptionRequestSettings, schema, dataset, timer, SUBSCRIPTION_REFERRER, ) return request
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response: assert http_request.method == "POST" with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_extensions(), HTTPRequestSettings ) request = build_request(body, schema, timer, dataset, http_request.referrer) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def test_build_request(body: MutableMapping[str, Any], language: Language, condition: Expression) -> None: dataset = get_dataset("events") entity = dataset.get_default_entity() schema = RequestSchema.build_with_extensions( entity.get_extensions(), HTTPRequestSettings, language, ) request = build_request( body, parse_legacy_query if language == Language.LEGACY else partial( parse_snql_query, []), HTTPRequestSettings, schema, dataset, Timer("test"), "my_request", ) expected_query = Query( from_clause=Entity(EntityKey.EVENTS, entity.get_data_model()), selected_columns=[ SelectedExpression( name="time", expression=Column(alias="_snuba_time", table_name=None, column_name="time"), ), SelectedExpression("count", FunctionCall("_snuba_count", "count", tuple())), ], condition=condition, groupby=[Column("_snuba_time", None, "time")], limit=1000, granularity=60, ) assert request.referrer == "my_request" assert dict(request.body) == body status, differences = request.query.equals(expected_query) assert status == True, f"Query mismatch: {differences}"
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy if language == Language.SNQL: metrics.increment("snql.query.incoming", tags={"referrer": referrer}) parser: Callable[ [RequestParts, RequestSettings, Dataset], Union[Query, CompositeQuery[Entity]], ] = partial(parse_snql_query, []) else: parser = parse_legacy_query with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language ) request = build_request( body, parser, HTTPRequestSettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) # Some metrics to track the adoption of SnQL query_type = "simple" if language == Language.SNQL: if isinstance(request.query, CompositeQuery): if isinstance(request.query.get_from_clause(), JoinClause): query_type = "join" else: query_type = "subquery" metrics.increment( "snql.query.success", tags={"referrer": referrer, "type": query_type} ) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained if language == Language.SNQL: metrics.increment( "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"}, ) return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy # Try to detect if new requests are being sent to the api # after the shutdown command has been issued, and if so # how long after. I don't want to do a disk check for # every query, so randomly sample until the shutdown file # is detected, and then log everything if IS_SHUTTING_DOWN or random.random() < 0.05: if IS_SHUTTING_DOWN or check_down_file_exists(): tags = {"dataset": get_dataset_name(dataset)} metrics.increment("post.shutdown.query", tags=tags) diff = time.time() - (shutdown_time() or 0.0) # this should never be None metrics.timing("post.shutdown.query.delay", diff, tags=tags) with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build(HTTPQuerySettings) request = build_request( body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": str(cause), } logger.warning( str(cause), exc_info=True, ) elif isinstance(cause, ClickhouseError): status = get_http_status_for_clickhouse_error(cause) details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, QueryTooLongException): status = 400 details = {"type": "query-too-long", "message": str(cause)} elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.query_settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def test_nullable_field_casting(entity: Entity, expected_table_name: str) -> None: dataset_name = "discover" query_str = """MATCH (discover) SELECT uniq(sdk_version) WHERE timestamp >= toDateTime('2021-07-25T15:02:10') AND timestamp < toDateTime('2021-07-26T15:02:10') AND project_id IN tuple(5492900) """ # ----- create the request object as if it came in through our API ----- query_body = { "query": query_str, "debug": True, "dataset": dataset_name, "turbo": False, "consistent": False, } dataset = get_dataset(dataset_name) schema = RequestSchema.build(HTTPQuerySettings) request = build_request( query_body, parse_snql_query, HTTPQuerySettings, schema, dataset, Timer(name="bloop"), "some_referrer", ) # -------------------------------------------------------------------- def query_verifier( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: # The only reason this extends StringifyVisitor is because it has all the other # visit methods implemented. class NullCastingVerifier(StringifyVisitor): def __init__(self) -> None: self.sdk_version_cast_to_null = False super().__init__() def visit_function_call(self, exp: FunctionCall) -> str: if (exp.function_name == "cast" and exp.alias == "_snuba_sdk_version" and exp.parameters == ( Column(None, None, "sdk_version"), Literal(None, "Nullable(String)"), )): self.sdk_version_cast_to_null = True return super().visit_function_call(exp) for select_expr in query.get_selected_columns(): verifier = NullCastingVerifier() select_expr.expression.accept(verifier) assert verifier.sdk_version_cast_to_null return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, ) entity.get_query_pipeline_builder().build_execution_pipeline( request, query_verifier).execute()
def test_span_id_promotion(entity: Entity, expected_table_name: str) -> None: """In order to save space in the contexts column and provide faster query performance, we promote span_id to a proper column and don't store it in the actual contexts object in the DB. The client however, still queries by `contexts[trace.span_id]` and expects that it is a hex string rather than a 64 bit uint (which is what we store it as) This test makes sure that our query pipeline will do the proper column promotion and conversion """ dataset_name = "discover" # The client queries by contexts[trace.span_id] even though that's not how we store it query_str = f"""MATCH (discover) SELECT contexts[trace.span_id] WHERE timestamp >= toDateTime('2021-07-25T15:02:10') AND timestamp < toDateTime('2021-07-26T15:02:10') AND contexts[trace.span_id] = '{span_id_hex}' AND project_id IN tuple(5492900) """ # ----- create the request object as if it came in through our API ----- query_body = { "query": query_str, "debug": True, "dataset": dataset_name, "turbo": False, "consistent": False, } dataset = get_dataset(dataset_name) schema = RequestSchema.build(HTTPQuerySettings) request = build_request( query_body, parse_snql_query, HTTPQuerySettings, schema, dataset, Timer(name="bloop"), "some_referrer", ) # -------------------------------------------------------------------- def query_verifier( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: assert isinstance(query, Query) # in local and CI there's a table name difference # errors_local vs errors_dist and discover_local vs discover_dist # so we check using `in` instead of `==` assert expected_table_name in query.get_from_clause().table_name assert query.get_selected_columns() == [ SelectedExpression( name="contexts[trace.span_id]", # the select converts the span_id into a lowecase hex string expression=FunctionCall( "_snuba_contexts[trace.span_id]", "lower", (FunctionCall(None, "hex", (Column(None, None, "span_id"), )), ), ), ) ] class SpanIdVerifier(NoopVisitor): def __init__(self) -> None: self.found_span_condition = False super().__init__() def visit_function_call(self, exp: FunctionCall) -> None: if exp.function_name == "equals" and exp.parameters[ 0] == Column(None, None, "span_id"): self.found_span_condition = True # and here we can see that the hex string the client queried us with # has been converted to the correct uint64 assert exp.parameters[1] == Literal( None, span_id_as_uint64) return super().visit_function_call(exp) verifier = SpanIdVerifier() condition = query.get_condition() assert condition is not None condition.accept(verifier) assert verifier.found_span_condition return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, ) entity.get_query_pipeline_builder().build_execution_pipeline( request, query_verifier).execute()
def test_tags_hashmap_optimization() -> None: entity = get_entity(EntityKey.DISCOVER) dataset_name = "discover" query_str = """ MATCH (discover) SELECT count() AS count WHERE timestamp >= toDateTime('2021-07-12T19:45:01') AND timestamp < toDateTime('2021-08-11T19:45:01') AND project_id IN tuple(300688) AND ifNull(tags[duration_group], '') != '' AND ifNull(tags[duration_group], '') = '<10s' LIMIT 50 """ # ----- create the request object as if it came in through our API ----- query_body = { "query": query_str, "debug": True, "dataset": dataset_name, "turbo": False, "consistent": False, } dataset = get_dataset(dataset_name) schema = RequestSchema.build(HTTPQuerySettings) request = build_request( query_body, parse_snql_query, HTTPQuerySettings, schema, dataset, Timer(name="bloop"), "some_referrer", ) # -------------------------------------------------------------------- def query_verifier(query: Query, settings: QuerySettings, reader: Reader) -> None: class ConditionVisitor(NoopVisitor): def __init__(self) -> None: self.found_hashmap_condition = False def visit_function_call(self, exp: FunctionCall) -> None: assert exp.function_name != "arrayElement" if ( exp.function_name == "has" and isinstance(exp.parameters[0], Column) and exp.parameters[0].column_name == "_tags_hash_map" ): self.found_hashmap_condition = True return super().visit_function_call(exp) visitor = ConditionVisitor() query.get_condition().accept(visitor) assert visitor.found_hashmap_condition entity.get_query_pipeline_builder().build_execution_pipeline( request, query_verifier ).execute()