def parse_and_run_query(dataset: Dataset, request: Request, timer: Timer) -> RawQueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ request_copy = copy.deepcopy(request) query_metadata = SnubaQueryMetadata( request=request_copy, dataset=get_dataset_name(dataset), timer=timer, query_list=[], ) with sentry_sdk.configure_scope() as scope: if scope.span: scope.span.set_tag("dataset", get_dataset_name(dataset)) scope.span.set_tag("referrer", http_request.referrer) try: result = _run_query_pipeline(dataset=dataset, request=request, timer=timer, query_metadata=query_metadata) record_query(request_copy, timer, query_metadata) except RawQueryException as error: record_query(request_copy, timer, query_metadata) raise error return result
def parse_and_run_query( dataset: Dataset, request: Request, timer: Timer, robust: bool = False, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ query_metadata = SnubaQueryMetadata( request=request, dataset=get_dataset_name(dataset), timer=timer, query_list=[], ) try: result = _run_query_pipeline( dataset=dataset, request=request, timer=timer, query_metadata=query_metadata, robust=robust, concurrent_queries_gauge=concurrent_queries_gauge, ) if not request.settings.get_dry_run(): record_query(request, timer, query_metadata, result.extra) except QueryException as error: record_query(request, timer, query_metadata, error.extra) raise error return result
def _trace_transaction(dataset: Dataset) -> None: with sentry_sdk.configure_scope() as scope: if scope.span: scope.span.set_tag("dataset", get_dataset_name(dataset)) scope.span.set_tag("referrer", http_request.referrer) if scope.transaction: scope.transaction = f"{scope.transaction.name}__{get_dataset_name(dataset)}__{http_request.referrer}"
def parse_and_run_query( dataset: Dataset, request: Request, timer: Timer, robust: bool = False, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ # from_clause = request.query.get_from_clause() start, end = None, None entity_name = "unknown" if isinstance(request.query, LogicalQuery): entity_key = request.query.get_from_clause().key entity = get_entity(entity_key) entity_name = entity_key.value if entity.required_time_column is not None: start, end = get_time_range(request.query, entity.required_time_column) query_metadata = SnubaQueryMetadata( request=request, start_timestamp=start, end_timestamp=end, dataset=get_dataset_name(dataset), entity=entity_name, timer=timer, query_list=[], projects=ProjectsFinder().visit(request.query), snql_anonymized=request.snql_anonymized, ) try: result = _run_query_pipeline( dataset=dataset, request=request, timer=timer, query_metadata=query_metadata, robust=robust, concurrent_queries_gauge=concurrent_queries_gauge, ) _set_query_final(request, result.extra) if not request.query_settings.get_dry_run(): record_query(request, timer, query_metadata, result.extra) except QueryException as error: _set_query_final(request, error.extra) record_query(request, timer, query_metadata, error.extra) raise error return result
def selector(query: Union[CompositeQuery[QueryEntity], LogicalQuery]) -> None: # If you are doing a JOIN, then you have to specify the entity if isinstance(query, CompositeQuery): return if get_dataset_name(dataset) == "discover": query_entity = query.get_from_clause() # The legacy -> snql parser will mark queries with no entity specified as the "discover" entity # so only do this selection in that case. If someone wants the "discover" entity specifically # then their query will have to only use fields from that entity. if query_entity.key == EntityKey.DISCOVER: selected_entity_key = dataset.select_entity(query) selected_entity = get_entity(selected_entity_key) query_entity = QueryEntity( selected_entity_key, selected_entity.get_data_model() ) query.set_from_clause(query_entity) # XXX: This exists only to ensure that the generated SQL matches legacy queries. def replace_time_condition_aliases(exp: Expression) -> Expression: if ( isinstance(exp, FunctionCall) and len(exp.parameters) == 2 and isinstance(exp.parameters[0], Column) and exp.parameters[0].alias == "_snuba_timestamp" ): return FunctionCall( exp.alias, exp.function_name, ( Column( f"_snuba_{selected_entity.required_time_column}", exp.parameters[0].table_name, exp.parameters[0].column_name, ), exp.parameters[1], ), ) return exp condition = query.get_condition() if condition is not None: query.set_ast_condition( condition.transform(replace_time_condition_aliases) )
def __init__( self, dataset: Dataset, executor: ThreadPoolExecutor, schedulers: Mapping[int, Scheduler[Subscription]], producer: Producer[SubscriptionTaskResult], topic: Topic, metrics: MetricsBackend, ) -> None: self.__dataset = dataset self.__dataset_name = get_dataset_name(self.__dataset) self.__executor = executor self.__schedulers = schedulers self.__producer = producer self.__topic = topic self.__metrics = metrics self.__concurrent_gauge: Gauge = ThreadSafeGauge( self.__metrics, "executor.concurrent") self.__concurrent_clickhouse_gauge: Gauge = ThreadSafeGauge( self.__metrics, "executor.concurrent.clickhouse")
def parse_and_run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ request_copy = copy.deepcopy(request) query_metadata = SnubaQueryMetadata( request=request_copy, dataset=get_dataset_name(dataset), timer=timer, query_list=[], ) try: result = _run_query_pipeline(request=request, timer=timer, query_metadata=query_metadata) record_query(request_copy, timer, query_metadata) except QueryException as error: record_query(request_copy, timer, query_metadata) raise error return result
def test_get_dataset_name() -> None: dataset_name = "events" assert get_dataset_name(get_dataset(dataset_name)) == dataset_name
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy # Try to detect if new requests are being sent to the api # after the shutdown command has been issued, and if so # how long after. I don't want to do a disk check for # every query, so randomly sample until the shutdown file # is detected, and then log everything if IS_SHUTTING_DOWN or random.random() < 0.05: if IS_SHUTTING_DOWN or check_down_file_exists(): tags = {"dataset": get_dataset_name(dataset)} metrics.increment("post.shutdown.query", tags=tags) diff = time.time() - (shutdown_time() or 0.0) # this should never be None metrics.timing("post.shutdown.query.delay", diff, tags=tags) with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build(HTTPQuerySettings) request = build_request( body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": str(cause), } logger.warning( str(cause), exc_info=True, ) elif isinstance(cause, ClickhouseError): status = get_http_status_for_clickhouse_error(cause) details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, QueryTooLongException): status = 400 details = {"type": "query-too-long", "message": str(cause)} elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.query_settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def to_url(self, value: Dataset) -> str: return get_dataset_name(value)
def test(self): dataset_name = "events" assert get_dataset_name(get_dataset(dataset_name)) == dataset_name