def compare_conditions( self, subscription: SubscriptionData, exception: Optional[Type[Exception]], aggregate: str, value: Union[int, float], ) -> None: timer = Timer("test") if exception is not None: with pytest.raises(exception): request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) parse_and_run_query(self.dataset, request, timer) return request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) result = parse_and_run_query(self.dataset, request, timer) assert result.result["data"][0][aggregate] == value
def create(self, data: SubscriptionData, timer: Timer) -> SubscriptionIdentifier: # We want to test the query out here to make sure it's valid and can run request = data.build_request(self.dataset, datetime.utcnow(), None, timer) parse_and_run_query(self.dataset, request, timer) identifier = SubscriptionIdentifier( self.__partitioner.build_partition_id(data), uuid1(), ) RedisSubscriptionDataStore(redis_client, self.dataset, identifier.partition).create( identifier.uuid, data, ) return identifier
def __execute(self, task: ScheduledTask[Subscription], tick: Tick) -> Tuple[Request, Result]: # Measure the amount of time that took between this task being # scheduled and it beginning to execute. self.__metrics.timing("executor.latency", (time.time() - task.timestamp.timestamp()) * 1000) # XXX: The ``query`` name is taken from the web views so that all query # performance metrics are reported to the same spot, regardless of # execution environment. timer = Timer("query") request = task.task.data.build_request( self.__dataset, task.timestamp, tick.offsets.upper, timer, ) with self.__concurrent_gauge: # XXX: The ``extra`` is discarded from ``QueryResult`` since it is # not particularly useful in this context and duplicates data that # is already being published to the query log. # XXX: The ``request`` instance is copied when passed to # ``parse_and_run_query`` since it can/will be mutated during # processing. return ( request, parse_and_run_query(self.__dataset, copy.deepcopy(request), timer).result, )
def __execute_query(self, task: ScheduledSubscriptionTask, tick_upper_offset: int) -> Tuple[Request, Result]: # Measure the amount of time that took between the task's scheduled # time and it beginning to execute. self.__metrics.timing("executor.latency", (time.time() - task.timestamp.timestamp()) * 1000) timer = Timer("query") with self.__concurrent_gauge: request = task.task.subscription.data.build_request( self.__dataset, task.timestamp, tick_upper_offset, timer, self.__metrics, "subscriptions_executor", ) result = parse_and_run_query( self.__dataset, request, timer, robust=True, concurrent_queries_gauge=self.__concurrent_clickhouse_gauge, ).result return (request, result)
def test_conditions(self, subscription: SubscriptionData, exception: Optional[Exception]) -> None: timer = Timer("test") if exception is not None: with pytest.raises(exception): request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) result = parse_and_run_query(self.dataset, request, timer) return request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) result = parse_and_run_query(self.dataset, request, timer) assert result.result["data"][0]["count"] == 10
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response: assert http_request.method == "POST" with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_extensions(), HTTPRequestSettings ) request = build_request(body, schema, timer, dataset, http_request.referrer) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def test_conditions(self) -> None: subscription = SubscriptionData( project_id=self.project_id, conditions=[["platform", "IN", ["a"]]], aggregations=[["count()", "", "count"]], time_window=timedelta(minutes=500), resolution=timedelta(minutes=1), ) timer = Timer("test") request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) result = parse_and_run_query(self.dataset, request, timer) assert result.result["data"][0]["count"] == 10
def run_non_consistent() -> Result: request_copy = Request( id=request.id, body=copy.deepcopy(request.body), query=copy.deepcopy(request.query), settings=SubscriptionRequestSettings(consistent=False), referrer=request.referrer, ) return parse_and_run_query( self.__dataset, request_copy, timer, robust=True, concurrent_queries_gauge=self.__concurrent_clickhouse_gauge if not is_consistent_query else None, ).result
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult: try: result = parse_and_run_query(dataset, request, timer) payload = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return WebQueryResult(payload, 200) except RawQueryException as e: return WebQueryResult( { "error": {"type": e.err_type, "message": e.message, **e.meta}, "sql": e.sql, "stats": e.stats, "timing": timer.for_json(), }, 429 if e.err_type == "rate-limited" else 500, )
def run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult: try: return QueryResult( { **parse_and_run_query(dataset, request, timer), "timing": timer.for_json(), }, 200, ) except RawQueryException as e: return QueryResult( { "error": { "type": e.err_type, "message": e.message, **e.meta }, "sql": e.sql, "stats": e.stats, "timing": timer.for_json(), }, 429 if e.err_type == "rate-limited" else 500, )
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy if language == Language.SNQL: metrics.increment("snql.query.incoming", tags={"referrer": referrer}) parser: Callable[ [RequestParts, RequestSettings, Dataset], Union[Query, CompositeQuery[Entity]], ] = partial(parse_snql_query, []) else: parser = parse_legacy_query with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language ) request = build_request( body, parser, HTTPRequestSettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) # Some metrics to track the adoption of SnQL query_type = "simple" if language == Language.SNQL: if isinstance(request.query, CompositeQuery): if isinstance(request.query.get_from_clause(), JoinClause): query_type = "join" else: query_type = "subquery" metrics.increment( "snql.query.success", tags={"referrer": referrer, "type": query_type} ) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained if language == Language.SNQL: metrics.increment( "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"}, ) return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def _test_request(self, data: SubscriptionData, timer: Timer) -> None: request = data.build_request(self.dataset, datetime.utcnow(), None, timer) parse_and_run_query(self.dataset, request, timer)
def test_transform_column_names() -> None: """ Runs a simple query containing selected expressions names that do not match the aliases of the expressions themselves. It verifies that the names of the columns in the result correspond to the SelectedExpression names and not to the expression aliases (which are supposed to be internal). """ events_storage = get_entity(EntityKey.EVENTS).get_writable_storage() event_id = uuid.uuid4().hex event_date = datetime.utcnow() write_unprocessed_events( events_storage, [ InsertEvent( { "event_id": event_id, "group_id": 10, "primary_hash": uuid.uuid4().hex, "project_id": 1, "message": "a message", "platform": "python", "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT), "data": {"received": time.time()}, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, } ) ], ) query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ # The selected expression names are those provided by the # user in the query and those the user expect in the response. # Aliases will be internal names to prevent shadowing. SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")), SelectedExpression( "message", FunctionCall( "_snuba_message", "ifNull", (Column(None, None, "message"), Literal(None, "")), ), ), ], ) query_settings = HTTPRequestSettings() apply_query_extensions( query, { "timeseries": { "from_date": (event_date - timedelta(minutes=5)).strftime( settings.PAYLOAD_DATETIME_FORMAT ), "to_date": (event_date + timedelta(minutes=1)).strftime( settings.PAYLOAD_DATETIME_FORMAT ), "granularity": 3600, }, "project": {"project": [1]}, }, query_settings, ) dataset = get_dataset("events") timer = Timer("test") result = parse_and_run_query( dataset, Request( id="asd", body={}, query=query, settings=query_settings, referrer="asd", ), timer, ) data = result.result["data"] assert data == [{"event_id": event_id, "message": "a message"}] meta = result.result["meta"] assert meta == [ MetaColumn(name="event_id", type="String"), MetaColumn(name="message", type="String"), ]
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy # Try to detect if new requests are being sent to the api # after the shutdown command has been issued, and if so # how long after. I don't want to do a disk check for # every query, so randomly sample until the shutdown file # is detected, and then log everything if IS_SHUTTING_DOWN or random.random() < 0.05: if IS_SHUTTING_DOWN or check_down_file_exists(): tags = {"dataset": get_dataset_name(dataset)} metrics.increment("post.shutdown.query", tags=tags) diff = time.time() - (shutdown_time() or 0.0) # this should never be None metrics.timing("post.shutdown.query.delay", diff, tags=tags) with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build(HTTPQuerySettings) request = build_request( body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": str(cause), } logger.warning( str(cause), exc_info=True, ) elif isinstance(cause, ClickhouseError): status = get_http_status_for_clickhouse_error(cause) details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, QueryTooLongException): status = 400 details = {"type": "query-too-long", "message": str(cause)} elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.query_settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})