def _run_query_pipeline( dataset: Dataset, request: Request, timer: Timer, query_metadata: SnubaQueryMetadata, ) -> RawQueryResult: """ Runs the query processing and execution pipeline for a Snuba Query. This means it takes a Dataset and a Request and returns the results of the query. This process includes: - Applying dataset specific syntax extensions (QueryExtension) - Applying dataset query processors on the abstract Snuba query. - Using the dataset provided StorageQueryPlanBuilder to build a StorageQueryPlan. This step transforms the Snuba Query into the Storage Query (that is contextual to the storage/s). From this point on none should depend on the dataset. - Executing the storage specific query processors. - Providing the newly built Query and a QueryRunner to the QueryExecutionStrategy to actually run the DB Query. """ # TODO: this will work perfectly with datasets that are not time series. Remove it. from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit( request.extensions["timeseries"]) if (request.query.get_sample() is not None and request.query.get_sample() != 1.0) and not request.settings.get_turbo(): metrics.increment("sample_without_turbo", tags={"referrer": request.referrer}) extensions = dataset.get_extensions() for name, extension in extensions.items(): extension.get_processor().process_query(request.query, request.extensions[name], request.settings) # TODO: Fit this in a query processor. All query transformations should be driven by # datasets/storages and never hardcoded. if request.settings.get_turbo(): request.query.set_final(False) for processor in dataset.get_query_processors(): processor.process_query(request.query, request.settings) storage_query_plan = dataset.get_query_plan_builder().build_plan(request) # TODO: This below should be a storage specific query processor. relational_source = request.query.get_data_source() request.query.add_conditions(relational_source.get_mandatory_conditions()) for processor in storage_query_plan.query_processors: processor.process_query(request.query, request.settings) query_runner = partial( _format_storage_query_and_run, dataset, timer, query_metadata, from_date, to_date, ) return storage_query_plan.execution_strategy.execute(request, query_runner)
def _run_query_pipeline( dataset: Dataset, request: Request, timer: Timer, query_metadata: SnubaQueryMetadata, ) -> QueryResult: """ Runs the query processing and execution pipeline for a Snuba Query. This means it takes a Dataset and a Request and returns the results of the query. This process includes: - Applying dataset specific syntax extensions (QueryExtension) - Applying dataset query processors on the abstract Snuba query. - Using the dataset provided ClickhouseQueryPlanBuilder to build a ClickhouseQueryPlan. This step transforms the Snuba Query into the Storage Query (that is contextual to the storage/s). From this point on none should depend on the dataset. - Executing the plan specific query processors. - Providing the newly built Query, processors to be run for each DB query and a QueryRunner to the QueryExecutionStrategy to actually run the DB Query. """ # TODO: this will work perfectly with datasets that are not time series. Remove it. from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit( request.extensions["timeseries"] ) if ( request.query.get_sample() is not None and request.query.get_sample() != 1.0 ) and not request.settings.get_turbo(): metrics.increment("sample_without_turbo", tags={"referrer": request.referrer}) extensions = dataset.get_extensions() for name, extension in extensions.items(): with sentry_sdk.start_span( description=type(extension.get_processor()).__name__, op="extension" ): extension.get_processor().process_query( request.query, request.extensions[name], request.settings ) # TODO: Fit this in a query processor. All query transformations should be driven by # datasets/storages and never hardcoded. if request.settings.get_turbo(): request.query.set_final(False) for processor in dataset.get_query_processors(): with sentry_sdk.start_span( description=type(processor).__name__, op="processor" ): processor.process_query(request.query, request.settings) query_plan = dataset.get_query_plan_builder().build_plan(request) # From this point on. The logical query should not be used anymore by anyone. # The Clickhouse Query is the one to be used to run the rest of the query pipeline. # TODO: Break the Query Plan execution out of this method. With the division # between plan specific processors and DB query specific processors and with # the soon to come ClickhouseCluster, there is more coupling between the # components of the query plan. for clickhouse_processor in query_plan.plan_processors: with sentry_sdk.start_span( description=type(clickhouse_processor).__name__, op="processor" ): clickhouse_processor.process_query(query_plan.query, request.settings) query_runner = partial( _format_storage_query_and_run, timer, query_metadata, from_date, to_date, request.referrer, ) return query_plan.execution_strategy.execute( query_plan.query, request.settings, query_runner )