def test_query_formatter( query: Union[ProcessableQuery, CompositeQuery[Entity]], formatted: TExpression, ) -> None: formatted_query = format_query(query) # type: ignore assert formatted_query == formatted # make sure there are no empty lines assert [line for line in formatted_query if not line] == []
def __repr__(self) -> str: from snuba.query.formatters.tracing import format_query # NOTE (Vlad): Why the type is cast: # If you remove the ignore type comment you will get the following error: # # Argument 1 to "format_query" has incompatible type # "ProcessableQuery[TSimpleDataSource]"; expected # "Union[ProcessableQuery[SimpleDataSource], CompositeQuery[SimpleDataSource]]" # # This happens because self in this case is a generic type # CompositeQuery[TSimpleDataSource] while the function format_query takes a # SimpleDataSource (a concrete type). It is known by us (and mypy) that # TSimpleDataSource is bound to SimpleDataSource, which means that all types # parametrizing this class must be subtypes of SimpleDataSource, mypy is not smart # enough to know that though and so in order to have a generic repr function # I cast the type check in this case. # Making TSimpleDataSource covariant would almost work except that covariant types # canot be used as parameters: https://github.com/python/mypy/issues/7049 return "\n".join(format_query(cast(CompositeQuery[SimpleDataSource], self)))
def test_query_formatter(query: Union[LogicalQuery, CompositeQuery[Entity]], formatted: TExpression) -> None: assert format_query(query) == formatted
def callback_func( storage: str, query: Query, request_settings: RequestSettings, referrer: str, results: List[Result[QueryResult]], ) -> None: cache_hit = False is_duplicate = False # Captures if any of the queries involved was a cache hit or duplicate, as cache # hits may a cause of inconsistency between results. # Doesn't attempt to distinguish between all of the specific scenarios (one or both # queries, or splits of those queries could have hit the cache). if any([result.result.extra["stats"].get("cache_hit", 0) for result in results]): cache_hit = True elif any( [result.result.extra["stats"].get("is_duplicate", 0) for result in results] ): is_duplicate = True consistent = request_settings.get_consistent() if not results: metrics.increment( "query_result", tags={"storage": storage, "match": "empty", "referrer": referrer}, ) return primary_result = results.pop(0) primary_result_data = primary_result.result.result["data"] for result in results: result_data = result.result.result["data"] metrics.timing( "diff_ms", round((result.execution_time - primary_result.execution_time) * 1000), tags={ "referrer": referrer, "cache_hit": str(cache_hit), "is_duplicate": str(is_duplicate), "consistent": str(consistent), }, ) # Do not bother diffing the actual results of sampled queries if request_settings.get_turbo() or query.get_sample() not in [None, 1.0]: return if result_data == primary_result_data: metrics.increment( "query_result", tags={ "storage": storage, "match": "true", "referrer": referrer, "cache_hit": str(cache_hit), "is_duplicate": str(is_duplicate), "consistent": str(consistent), }, ) else: # Do not log cache hits to Sentry as it creates too much noise if cache_hit: continue reason = assign_reason_category(result_data, primary_result_data, referrer) metrics.increment( "query_result", tags={ "storage": storage, "match": "false", "referrer": referrer, "reason": reason, "cache_hit": str(cache_hit), "is_duplicate": str(is_duplicate), "consistent": str(consistent), }, ) if len(result_data) != len(primary_result_data): sentry_sdk.capture_message( f"Non matching {storage} result - different length", level="warning", tags={ "referrer": referrer, "storage": storage, "reason": reason, "cache_hit": str(cache_hit), "is_duplicate": str(is_duplicate), "consistent": str(consistent), }, extras={ "query": format_query(query), "primary_result": len(primary_result_data), "other_result": len(result_data), }, ) break # Avoid sending too much data to Sentry - just one row for now for idx in range(len(result_data)): if result_data[idx] != primary_result_data[idx]: sentry_sdk.capture_message( "Non matching result - different result", level="warning", tags={ "referrer": referrer, "storage": storage, "reason": reason, "cache_hit": str(cache_hit), "is_duplicate": str(is_duplicate), "consistent": str(consistent), }, extras={ "query": format_query(query), "primary_result": primary_result_data[idx], "other_result": result_data[idx], }, ) break
def callback_func(storage: str, query: Query, referrer: str, results: List[Result[QueryResult]]) -> None: if not results: metrics.increment( "query_result", tags={ "storage": storage, "match": "empty", "referrer": referrer }, ) return primary_result = results.pop(0) primary_result_data = primary_result.result.result["data"] for result in results: result_data = result.result.result["data"] metrics.timing( "diff_ms", round((result.execution_time - primary_result.execution_time) * 1000), tags={"referrer": referrer}, ) if result_data == primary_result_data: metrics.increment( "query_result", tags={ "storage": storage, "match": "true", "referrer": referrer }, ) else: metrics.increment( "query_result", tags={ "storage": storage, "match": "false", "referrer": referrer }, ) if len(result_data) != len(primary_result_data): sentry_sdk.capture_message( f"Non matching {storage} result - different length", level="warning", tags={ "referrer": referrer, "storage": storage }, extras={ "query": format_query(query), "primary_result": len(primary_result_data), "other_result": len(result_data), }, ) break # Avoid sending too much data to Sentry - just one row for now for idx in range(len(result_data)): if result_data[idx] != primary_result_data[idx]: sentry_sdk.capture_message( "Non matching result - different result", level="warning", tags={ "referrer": referrer, "storage": storage }, extras={ "query": format_query(query), "primary_result": primary_result_data[idx], "other_result": result_data[idx], }, ) break