def install(self): with configure_scope() as scope: @scope.add_event_processor def processor(event, hint): if "modules" not in event: event["modules"] = dict(_get_installed_modules()) return event
def setup_once(): try: data = dict(_read_classad(os.environ['_CONDOR_JOB_AD'])) except (KeyError, IOError): pass else: with configure_scope() as scope: scope.set_tag('htcondor.cluster_id', '{}.{}'.format( data['ClusterId'], data['ProcId']))
def install(self): with configure_scope() as scope: @scope.add_error_processor def processor(event, exc_info): exc = exc_info[1] if _last_seen.get(None) is exc: return _last_seen.set(exc) return event
def raw_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, query_metadata: SnubaQueryMetadata, stats: MutableMapping[str, Any], trace_id: Optional[str] = None, ) -> QueryResult: """ Submits a raw SQL query to the DB and does some post-processing on it to fix some of the formatting issues in the result JSON. This function is not supposed to depend on anything higher level than the clickhouse query. If this function ends up depending on the dataset, something is wrong. """ all_confs = state.get_all_configs() query_settings: MutableMapping[str, Any] = { k.split("/", 1)[1]: v for k, v in all_confs.items() if k.startswith("query_settings/") } timer.mark("get_configs") sql = formatted_query.get_sql() update_with_status = partial( update_query_metadata_and_stats, clickhouse_query, sql, timer, stats, query_metadata, query_settings, trace_id, ) execute_query_strategy = ( execute_query_with_readthrough_caching if state.get_config( "use_readthrough_query_cache", 1) else execute_query_with_caching) try: result = execute_query_strategy( clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, ) except Exception as cause: if isinstance(cause, RateLimitExceeded): stats = update_with_status(QueryStatus.RATE_LIMITED) else: with configure_scope() as scope: if isinstance(cause, ClickhouseError): scope.fingerprint = ["{{default}}", str(cause.code)] logger.exception("Error running query: %s\n%s", sql, cause) stats = update_with_status(QueryStatus.ERROR) raise QueryException({"stats": stats, "sql": sql}) from cause else: stats = update_with_status(QueryStatus.SUCCESS) return QueryResult(result, {"stats": stats, "sql": sql})
def raw_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Union[Query, CompositeQuery[Table]], query_settings: QuerySettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, query_metadata: SnubaQueryMetadata, stats: MutableMapping[str, Any], trace_id: Optional[str] = None, robust: bool = False, ) -> QueryResult: """ Submits a raw SQL query to the DB and does some post-processing on it to fix some of the formatting issues in the result JSON. This function is not supposed to depend on anything higher level than the clickhouse query. If this function ends up depending on the dataset, something is wrong. """ all_confs = state.get_all_configs() clickhouse_query_settings: MutableMapping[str, Any] = { k.split("/", 1)[1]: v for k, v in all_confs.items() if k.startswith("query_settings/") } timer.mark("get_configs") sql = formatted_query.get_sql() update_with_status = partial( update_query_metadata_and_stats, clickhouse_query, sql, timer, stats, query_metadata, clickhouse_query_settings, trace_id, ) execute_query_strategy = ( execute_query_with_readthrough_caching if state.get_config("use_readthrough_query_cache", 1) else execute_query_with_caching ) try: result = execute_query_strategy( clickhouse_query, query_settings, formatted_query, reader, timer, stats, clickhouse_query_settings, robust=robust, ) except Exception as cause: if isinstance(cause, RateLimitExceeded): stats = update_with_status(QueryStatus.RATE_LIMITED) else: error_code = None with configure_scope() as scope: if isinstance(cause, ClickhouseError): error_code = cause.code scope.fingerprint = ["{{default}}", str(cause.code)] if scope.span: if cause.code == errors.ErrorCodes.TOO_SLOW: sentry_sdk.set_tag("timeout", "predicted") elif cause.code == errors.ErrorCodes.TIMEOUT_EXCEEDED: sentry_sdk.set_tag("timeout", "query_timeout") elif cause.code in ( errors.ErrorCodes.SOCKET_TIMEOUT, errors.ErrorCodes.NETWORK_ERROR, ): sentry_sdk.set_tag("timeout", "network") elif isinstance( cause, (TimeoutError, ExecutionTimeoutError, TigerExecutionTimeoutError), ): if scope.span: sentry_sdk.set_tag("timeout", "cache_timeout") logger.exception("Error running query: %s\n%s", sql, cause) stats = update_with_status(QueryStatus.ERROR, error_code=error_code) raise QueryException( { "stats": stats, "sql": sql, "experiments": clickhouse_query.get_experiments(), } ) from cause else: stats = update_with_status(QueryStatus.SUCCESS, result["profile"]) return QueryResult( result, { "stats": stats, "sql": sql, "experiments": clickhouse_query.get_experiments(), }, )