def record_query(request: Request, timer: Timer, query_metadata: SnubaQueryMetadata) -> None: """ Records a request after it has been parsed and validated, whether we actually ran a query or not. """ if settings.RECORD_QUERIES: # Send to redis # We convert this to a dict before passing it to state in order to avoid a # circular dependency, where state would depend on the higher level # QueryMetadata class state.record_query(query_metadata.to_dict()) final = str(request.query.get_final()) referrer = request.referrer or "none" timer.send_metrics_to( metrics, tags={ "status": query_metadata.status.value, "referrer": referrer, "final": final, }, mark_tags={ "final": final, "referrer": referrer }, ) _add_tags(timer, request)
def parse_and_run_query( dataset: Dataset, request: Request, timer: Timer, robust: bool = False, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ query_metadata = SnubaQueryMetadata( request=request, dataset=get_dataset_name(dataset), timer=timer, query_list=[], ) try: result = _run_query_pipeline( dataset=dataset, request=request, timer=timer, query_metadata=query_metadata, robust=robust, concurrent_queries_gauge=concurrent_queries_gauge, ) if not request.settings.get_dry_run(): record_query(request, timer, query_metadata, result.extra) except QueryException as error: record_query(request, timer, query_metadata, error.extra) raise error return result
def parse_and_run_query( dataset: Dataset, request: Request, timer: Timer, robust: bool = False, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ # from_clause = request.query.get_from_clause() start, end = None, None entity_name = "unknown" if isinstance(request.query, LogicalQuery): entity_key = request.query.get_from_clause().key entity = get_entity(entity_key) entity_name = entity_key.value if entity.required_time_column is not None: start, end = get_time_range(request.query, entity.required_time_column) query_metadata = SnubaQueryMetadata( request=request, start_timestamp=start, end_timestamp=end, dataset=get_dataset_name(dataset), entity=entity_name, timer=timer, query_list=[], projects=ProjectsFinder().visit(request.query), snql_anonymized=request.snql_anonymized, ) try: result = _run_query_pipeline( dataset=dataset, request=request, timer=timer, query_metadata=query_metadata, robust=robust, concurrent_queries_gauge=concurrent_queries_gauge, ) _set_query_final(request, result.extra) if not request.query_settings.get_dry_run(): record_query(request, timer, query_metadata, result.extra) except QueryException as error: _set_query_final(request, error.extra) record_query(request, timer, query_metadata, error.extra) raise error return result
def record_query( request: Request, timer: Timer, query_metadata: SnubaQueryMetadata, extra_data: Mapping[str, Any], ) -> None: """ Records a request after it has been parsed and validated, whether we actually ran a query or not. """ if settings.RECORD_QUERIES: # Send to redis # We convert this to a dict before passing it to state in order to avoid a # circular dependency, where state would depend on the higher level # QueryMetadata class state.record_query(query_metadata.to_dict()) _record_timer_metrics(request, timer, query_metadata) _record_attribution_metrics(request, query_metadata, extra_data) _add_tags(timer, extra_data.get("experiments"), query_metadata)
def parse_and_run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult: """ Runs a Snuba Query, then records the metadata about each split query that was run. """ request_copy = copy.deepcopy(request) query_metadata = SnubaQueryMetadata( request=request_copy, dataset=get_dataset_name(dataset), timer=timer, query_list=[], ) try: result = _run_query_pipeline(request=request, timer=timer, query_metadata=query_metadata) record_query(request_copy, timer, query_metadata) except QueryException as error: record_query(request_copy, timer, query_metadata) raise error return result
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model())) request = Request( id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo(get_app_id("default"), "search", None, None, None), ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql= "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", sql_anonymized= "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100", start_timestamp=datetime.utcnow() - timedelta(days=3), end_timestamp=datetime.utcnow(), stats={ "sample": 10, "error_code": 386 }, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], projects={2}, snql_anonymized=request.snql_anonymized, entity=EntityKey.EVENTS.value, ).to_dict() processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer(). get_stream_loader().get_processor()) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [{ "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [2], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], }], None, )
def test_simple() -> None: request_body = { "selected_columns": ["event_id"], "orderby": "event_id", "sample": 0.1, "limit": 100, "offset": 50, "project": 1, } query = Query(get_storage(StorageKey.EVENTS).get_schema().get_data_source()) request = Request( uuid.UUID("a" * 32).hex, request_body, query, HTTPRequestSettings(), "search", ) time = TestingClock() timer = Timer("test", clock=time) time.sleep(0.01) message = SnubaQueryMetadata( request=request, dataset="events", timer=timer, query_list=[ ClickhouseQueryMetadata( sql="select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100", stats={"sample": 10}, status=QueryStatus.SUCCESS, profile=ClickhouseQueryProfile( time_range=10, table="events", all_columns={"timestamp", "tags"}, multi_level_condition=False, where_profile=FilterProfile( columns={"timestamp"}, mapping_cols={"tags"}, ), groupby_cols=set(), array_join_cols=set(), ), trace_id="b" * 32, ) ], ).to_dict() processor = ( get_writable_storage(StorageKey.QUERYLOG) .get_table_writer() .get_stream_loader() .get_processor() ) assert processor.process_message( message, KafkaMessageMetadata(0, 0, datetime.now()) ) == InsertBatch( [ { "request_id": str(uuid.UUID("a" * 32)), "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}', "referrer": "search", "dataset": "events", "projects": [1], "organization": None, "timestamp": timer.for_json()["timestamp"], "duration_ms": 10, "status": "success", "clickhouse_queries.sql": [ "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100" ], "clickhouse_queries.status": ["success"], "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))], "clickhouse_queries.duration_ms": [0], "clickhouse_queries.stats": ['{"sample": 10}'], "clickhouse_queries.final": [0], "clickhouse_queries.cache_hit": [0], "clickhouse_queries.sample": [10.0], "clickhouse_queries.max_threads": [0], "clickhouse_queries.num_days": [10], "clickhouse_queries.clickhouse_table": [""], "clickhouse_queries.query_id": [""], "clickhouse_queries.is_duplicate": [0], "clickhouse_queries.consistent": [0], "clickhouse_queries.all_columns": [["tags", "timestamp"]], "clickhouse_queries.or_conditions": [False], "clickhouse_queries.where_columns": [["timestamp"]], "clickhouse_queries.where_mapping_columns": [["tags"]], "clickhouse_queries.groupby_columns": [[]], "clickhouse_queries.array_join_columns": [[]], } ], )