def do_query(query: ClickhouseQuery, query_settings: QuerySettings) -> QueryResult: nonlocal query_run_count query_run_count += 1 if query_run_count == 1: return QueryResult( result={ "data": [ { "event_id": "a", "project_id": "1", "timestamp": " 2019-10-01 22:33:42", }, { "event_id": "a", "project_id": "1", "timestamp": " 2019-10-01 22:44:42", }, ] }, extra={}, ) else: assert query.get_limit() == 2 return QueryResult({}, {})
def do_query( query: ClickhouseQuery, request_settings: RequestSettings, reader: Reader, ) -> QueryResult: assert query == query return QueryResult({}, {})
def query_runner(query: Query, settings: QuerySettings, reader: Reader) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "tags[transaction]", Column("_snuba_tags[transaction]", None, "transaction_name"), ), SelectedExpression( "contexts[browser.name]", FunctionCall( "_snuba_contexts[browser.name]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "browser.name"), ), ), ), ), ), ] return QueryResult({}, {})
def test_split_metadata() -> None: result = QueryResult( result={ "meta": [ {"name": "field1", "type": "String"}, {"name": "field2", "type": "Datetime('Universal')"}, {"name": "field3", "type": "Float64"}, {"name": "field4", "type": "Enum"}, ], "data": [], "totals": {}, "profile": None, "trace_output": "asd", }, extra={"stats": {}, "sql": "select something", "experiments": {}}, ) split_schema = split_metadata(result) assert split_schema.complete is False assert split_schema.key_cols == ("field1", "field2") assert split_schema.value_cols == ("field3",) row = split_row( { "field1": "asd", "field2": datetime(2022, 1, 1, 0, 0, 0), "field3": 0.01, "field4": "asd", }, split_schema, ) assert row == SplitRow(key=("asd", datetime(2022, 1, 1, 0, 0, 0)), values=(0.01,))
def query_runner(query: Query, settings: RequestSettings, reader: Reader) -> QueryResult: if events_storage.get_storage_key() == StorageKey.EVENTS: transaction_col_name = "transaction" else: transaction_col_name = "transaction_name" assert query.get_selected_columns_from_ast() == [ SelectedExpression( "tags[transaction]", Column("_snuba_tags[transaction]", None, transaction_col_name), ), SelectedExpression( "contexts[browser.name]", FunctionCall( "_snuba_contexts[browser.name]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "browser.name"), ), ), ), ), ), ] return QueryResult({}, {})
def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": ""}) mock_query_runner = Mock(return_value=query_result) def callback_func(args: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "selected_columns": ["type", "project_id"], } events = get_dataset("events") query = parse_query(query_body, events) events_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.EVENTS)), ) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) delegator = PipelineDelegator( query_pipeline_builders={ "events": events_pipeline, "errors": errors_pipeline }, selector_func=lambda query, referrer: ("events", ["errors"]), callback_func=mock_callback, ) with cv: request_settings = HTTPRequestSettings() delegator.build_execution_pipeline( Request( "", query_body, query, request_settings, "ref", ), mock_query_runner, ).execute() cv.wait(timeout=5) assert mock_query_runner.call_count == 2 assert mock_callback.call_args == call( query, request_settings, "ref", [ Result("events", query_result, ANY), Result("errors", query_result, ANY) ], )
def query_runner(query: Query, settings: RequestSettings, reader: Reader[SqlQuery]) -> QueryResult: assert query.get_selected_columns_from_ast() == [ SelectedExpression( "duration_quantiles", CurriedFunctionCall( "duration_quantiles", FunctionCall( None, "quantilesIfMerge", (Literal(None, 0.5), Literal(None, 0.9)), ), (Column(None, None, "duration_quantiles"), ), ), ), SelectedExpression( "sessions", FunctionCall("sessions", "countIfMerge", (Column(None, None, "sessions"), )), ), SelectedExpression( "users", FunctionCall("users", "uniqIfMerge", (Column(None, None, "users"), )), ), ] return QueryResult({}, {})
def do_query( query: ClickhouseQuery, request_settings: RequestSettings, ) -> QueryResult: from_date_ast, to_date_ast = get_time_range(query, "timestamp") assert from_date_ast is not None and isinstance( from_date_ast, datetime) assert to_date_ast is not None and isinstance(to_date_ast, datetime) conditions = query.get_conditions() or [] from_date_str = next( (condition[2] for condition in conditions if condition[0] == "timestamp" and condition[1] == ">="), None, ) to_date_str = next( (condition[2] for condition in conditions if condition[0] == "timestamp" and condition[1] == "<"), None, ) assert from_date_str == from_date_ast.isoformat() assert to_date_str == to_date_ast.isoformat() found_timestamps.append( (from_date_ast.isoformat(), to_date_ast.isoformat())) return QueryResult({"data": []}, {})
def query_runner(query: Query, settings: RequestSettings, reader: Reader) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "org_id", Column("_snuba_org_id", None, "org_id"), ), SelectedExpression( "project_id", Column("_snuba_project_id", None, "project_id"), ), SelectedExpression( "tags[10]", FunctionCall( "_snuba_tags[10]", "arrayElement", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", (Column(None, None, "tags.key"), Literal(None, 10)), ), ), ), ), SelectedExpression( column_name, translated_value, ), ] return QueryResult({}, {})
def do_query( query: ClickhouseQuery, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: selected_col_names = [ c.expression.column_name for c in query.get_selected_columns_from_ast() or [] if isinstance(c.expression, Column) ] if selected_col_names == list(first_query_data[0].keys()): return QueryResult({"data": first_query_data}, {}) elif selected_col_names == list(second_query_data[0].keys()): return QueryResult({"data": second_query_data}, {}) else: raise ValueError(f"Unexpected selected columns: {selected_col_names}")
def query_verifier( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: assert isinstance(query, Query) # in local and CI there's a table name difference # errors_local vs errors_dist and discover_local vs discover_dist # so we check using `in` instead of `==` assert expected_table_name in query.get_from_clause().table_name assert query.get_selected_columns() == [ SelectedExpression( name="contexts[trace.span_id]", # the select converts the span_id into a lowecase hex string expression=FunctionCall( "_snuba_contexts[trace.span_id]", "lower", (FunctionCall(None, "hex", (Column(None, None, "span_id"), )), ), ), ) ] class SpanIdVerifier(NoopVisitor): def __init__(self) -> None: self.found_span_condition = False super().__init__() def visit_function_call(self, exp: FunctionCall) -> None: if exp.function_name == "equals" and exp.parameters[ 0] == Column(None, None, "span_id"): self.found_span_condition = True # and here we can see that the hex string the client queried us with # has been converted to the correct uint64 assert exp.parameters[1] == Literal( None, span_id_as_uint64) return super().visit_function_call(exp) verifier = SpanIdVerifier() condition = query.get_condition() assert condition is not None condition.accept(verifier) assert verifier.found_span_condition return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, )
def do_query( query: ClickhouseQuery, request_settings: RequestSettings, ) -> QueryResult: from_date_ast, to_date_ast = get_time_range(query, "timestamp") assert from_date_ast is not None and isinstance(from_date_ast, datetime) assert to_date_ast is not None and isinstance(to_date_ast, datetime) found_timestamps.append((from_date_ast.isoformat(), to_date_ast.isoformat())) return QueryResult({"data": []}, {})
def runner( query: Union[ClickhouseQuery, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, ) -> QueryResult: report = query.equals(processed_query) assert report[0], f"Mismatch: {report[1]}" return QueryResult( {"data": []}, {}, )
def do_query(query: ClickhouseQuery, request_settings: RequestSettings = None) -> QueryResult: return QueryResult( { "data": [{ id_column: "asd123", project_column: 123, timestamp_column: "2019-10-01 22:33:42", }] }, {}, )
def runner( query: Union[ClickhouseQuery, CompositeQuery[Table]], query_settings: QuerySettings, reader: Reader, ) -> QueryResult: report = query.equals(processed_query) assert report[0], f"Mismatch: {report[1]}" return QueryResult( {"data": []}, { "stats": {}, "sql": "", "experiments": {} }, )
def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: assert query.get_selected_columns() == [ SelectedExpression( "org_id", Column("_snuba_org_id", None, "org_id"), ), SelectedExpression( "project_id", Column("_snuba_project_id", None, "project_id"), ), SelectedExpression( "tags[10]", FunctionCall( "_snuba_tags[10]", "arrayElement", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", (Column(None, None, "tags.key"), Literal(None, 10)), ), ), ), ), SelectedExpression( column_name, translated_value, ), ] return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, )
def _dry_run_query_runner( clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, ) -> QueryResult: with sentry_sdk.start_span(description="dryrun_create_query", op="db") as span: formatted_query = format_query(clickhouse_query, request_settings) span.set_data("query", formatted_query.structured()) return QueryResult({ "data": [], "meta": [] }, { "stats": {}, "sql": formatted_query.get_sql() })
def query_runner(query: Query, settings: RequestSettings, reader: Reader) -> QueryResult: quantiles = tuple( Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1]) assert query.get_selected_columns() == [ SelectedExpression( "duration_quantiles", CurriedFunctionCall( "_snuba_duration_quantiles", FunctionCall( None, "quantilesIfMerge", quantiles, ), (Column(None, None, "duration_quantiles"), ), ), ), SelectedExpression( "sessions", FunctionCall( "_snuba_sessions", "plus", ( FunctionCall(None, "countIfMerge", (Column(None, None, "sessions"), )), FunctionCall( None, "sumIfMerge", (Column(None, None, "sessions_preaggr"), ), ), ), ), ), SelectedExpression( "users", FunctionCall("_snuba_users", "uniqIfMerge", (Column(None, None, "users"), )), ), ] return QueryResult({}, {})
def query_verifier( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: # The only reason this extends StringifyVisitor is because it has all the other # visit methods implemented. class NullCastingVerifier(StringifyVisitor): def __init__(self) -> None: self.sdk_version_cast_to_null = False super().__init__() def visit_function_call(self, exp: FunctionCall) -> str: if (exp.function_name == "cast" and exp.alias == "_snuba_sdk_version" and exp.parameters == ( Column(None, None, "sdk_version"), Literal(None, "Nullable(String)"), )): self.sdk_version_cast_to_null = True return super().visit_function_call(exp) for select_expr in query.get_selected_columns(): verifier = NullCastingVerifier() select_expr.expression.accept(verifier) assert verifier.sdk_version_cast_to_null return QueryResult( result={ "meta": [], "data": [], "totals": {} }, extra={ "stats": {}, "sql": "", "experiments": {} }, )
def raw_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Union[Query, CompositeQuery[Table]], query_settings: QuerySettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, query_metadata: SnubaQueryMetadata, stats: MutableMapping[str, Any], trace_id: Optional[str] = None, robust: bool = False, ) -> QueryResult: """ Submits a raw SQL query to the DB and does some post-processing on it to fix some of the formatting issues in the result JSON. This function is not supposed to depend on anything higher level than the clickhouse query. If this function ends up depending on the dataset, something is wrong. """ all_confs = state.get_all_configs() clickhouse_query_settings: MutableMapping[str, Any] = { k.split("/", 1)[1]: v for k, v in all_confs.items() if k.startswith("query_settings/") } timer.mark("get_configs") sql = formatted_query.get_sql() update_with_status = partial( update_query_metadata_and_stats, clickhouse_query, sql, timer, stats, query_metadata, clickhouse_query_settings, trace_id, ) execute_query_strategy = ( execute_query_with_readthrough_caching if state.get_config("use_readthrough_query_cache", 1) else execute_query_with_caching ) try: result = execute_query_strategy( clickhouse_query, query_settings, formatted_query, reader, timer, stats, clickhouse_query_settings, robust=robust, ) except Exception as cause: if isinstance(cause, RateLimitExceeded): stats = update_with_status(QueryStatus.RATE_LIMITED) else: error_code = None with configure_scope() as scope: if isinstance(cause, ClickhouseError): error_code = cause.code scope.fingerprint = ["{{default}}", str(cause.code)] if scope.span: if cause.code == errors.ErrorCodes.TOO_SLOW: sentry_sdk.set_tag("timeout", "predicted") elif cause.code == errors.ErrorCodes.TIMEOUT_EXCEEDED: sentry_sdk.set_tag("timeout", "query_timeout") elif cause.code in ( errors.ErrorCodes.SOCKET_TIMEOUT, errors.ErrorCodes.NETWORK_ERROR, ): sentry_sdk.set_tag("timeout", "network") elif isinstance( cause, (TimeoutError, ExecutionTimeoutError, TigerExecutionTimeoutError), ): if scope.span: sentry_sdk.set_tag("timeout", "cache_timeout") logger.exception("Error running query: %s\n%s", sql, cause) stats = update_with_status(QueryStatus.ERROR, error_code=error_code) raise QueryException( { "stats": stats, "sql": sql, "experiments": clickhouse_query.get_experiments(), } ) from cause else: stats = update_with_status(QueryStatus.SUCCESS, result["profile"]) return QueryResult( result, { "stats": stats, "sql": sql, "experiments": clickhouse_query.get_experiments(), }, )
def raw_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, query_metadata: SnubaQueryMetadata, stats: MutableMapping[str, Any], trace_id: Optional[str] = None, ) -> QueryResult: """ Submits a raw SQL query to the DB and does some post-processing on it to fix some of the formatting issues in the result JSON. This function is not supposed to depend on anything higher level than the clickhouse query. If this function ends up depending on the dataset, something is wrong. """ all_confs = state.get_all_configs() query_settings: MutableMapping[str, Any] = { k.split("/", 1)[1]: v for k, v in all_confs.items() if k.startswith("query_settings/") } timer.mark("get_configs") sql = formatted_query.get_sql() update_with_status = partial( update_query_metadata_and_stats, clickhouse_query, sql, timer, stats, query_metadata, query_settings, trace_id, ) execute_query_strategy = ( execute_query_with_readthrough_caching if state.get_config( "use_readthrough_query_cache", 1) else execute_query_with_caching) try: result = execute_query_strategy( clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, ) except Exception as cause: if isinstance(cause, RateLimitExceeded): stats = update_with_status(QueryStatus.RATE_LIMITED) else: with configure_scope() as scope: if isinstance(cause, ClickhouseError): scope.fingerprint = ["{{default}}", str(cause.code)] logger.exception("Error running query: %s\n%s", sql, cause) stats = update_with_status(QueryStatus.ERROR) raise QueryException({"stats": stats, "sql": sql}) from cause else: stats = update_with_status(QueryStatus.SUCCESS) return QueryResult(result, {"stats": stats, "sql": sql})
def query_runner(query: Query, settings: RequestSettings, reader: Reader) -> QueryResult: assert query.get_from_clause().table_name == expected_table return QueryResult({}, {})
from snuba.reader import Column, Result from snuba.web import QueryExtraData, QueryResult, transform_column_names TEST_CASES = [ pytest.param( QueryResult( result=Result( meta=[ Column(name="_snuba_event_id", type="String"), Column(name="_snuba_duration", type="UInt32"), Column(name="_snuba_message", type="String"), ], data=[ { "_snuba_event_id": "asd", "_snuba_duration": 123, "_snuba_message": "msg", }, { "_snuba_event_id": "sdf", "_snuba_duration": 321, "_snuba_message": "msg2", }, ], ), extra=QueryExtraData(stats={}, sql="...", experiments={}), ), QueryResult( result=Result( meta=[ Column(name="event_id", type="String"), Column(name="duration", type="UInt32"),
def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}}) def callback_func(primary: Optional[Tuple[str, QueryResult]], other: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "query": """ MATCH (events) SELECT type, project_id WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') """, "dataset": "events", } events = get_dataset("events") query, _ = parse_snql_query(query_body["query"], events) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) errors_ro_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS_RO)), ) delegator = PipelineDelegator( query_pipeline_builders={ "errors": errors_pipeline, "errors_ro": errors_ro_pipeline, }, selector_func=lambda query, referrer: ("errors", ["errors_ro"]), split_rate_limiter=True, ignore_secondary_exceptions=True, callback_func=mock_callback, ) runner_call_count = 0 runner_settings: MutableSequence[QuerySettings] = [] def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: nonlocal runner_call_count nonlocal runner_settings runner_call_count += 1 runner_settings.append(settings) return query_result set_config("pipeline_split_rate_limiter", 1) with cv: query_settings = HTTPQuerySettings(referrer="ref") delegator.build_execution_pipeline( Request( id="asd", original_body=query_body, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("ref"), "ref", None, None, None), ), query_runner, ).execute() cv.wait(timeout=5) assert runner_call_count == 2 assert len(runner_settings) == 2 settings, settings_ro = runner_settings # Validate that settings have been duplicated assert id(settings) != id(settings_ro) assert mock_callback.call_args == call( query, query_settings, "ref", Result("errors", query_result, ANY), [Result("errors_ro", query_result, ANY)], )