Esempio n. 1
0
    def install(self):
        with configure_scope() as scope:

            @scope.add_event_processor
            def processor(event, hint):
                if "modules" not in event:
                    event["modules"] = dict(_get_installed_modules())
                return event
Esempio n. 2
0
 def setup_once():
     try:
         data = dict(_read_classad(os.environ['_CONDOR_JOB_AD']))
     except (KeyError, IOError):
         pass
     else:
         with configure_scope() as scope:
             scope.set_tag('htcondor.cluster_id', '{}.{}'.format(
                 data['ClusterId'], data['ProcId']))
Esempio n. 3
0
    def install(self):
        with configure_scope() as scope:

            @scope.add_error_processor
            def processor(event, exc_info):
                exc = exc_info[1]
                if _last_seen.get(None) is exc:
                    return
                _last_seen.set(exc)
                return event
Esempio n. 4
0
def raw_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    stats: MutableMapping[str, Any],
    trace_id: Optional[str] = None,
) -> QueryResult:
    """
    Submits a raw SQL query to the DB and does some post-processing on it to
    fix some of the formatting issues in the result JSON.
    This function is not supposed to depend on anything higher level than the clickhouse
    query. If this function ends up depending on the dataset, something is wrong.
    """
    all_confs = state.get_all_configs()
    query_settings: MutableMapping[str, Any] = {
        k.split("/", 1)[1]: v
        for k, v in all_confs.items() if k.startswith("query_settings/")
    }

    timer.mark("get_configs")

    sql = formatted_query.get_sql()

    update_with_status = partial(
        update_query_metadata_and_stats,
        clickhouse_query,
        sql,
        timer,
        stats,
        query_metadata,
        query_settings,
        trace_id,
    )

    execute_query_strategy = (
        execute_query_with_readthrough_caching if state.get_config(
            "use_readthrough_query_cache", 1) else execute_query_with_caching)

    try:
        result = execute_query_strategy(
            clickhouse_query,
            request_settings,
            formatted_query,
            reader,
            timer,
            stats,
            query_settings,
        )
    except Exception as cause:
        if isinstance(cause, RateLimitExceeded):
            stats = update_with_status(QueryStatus.RATE_LIMITED)
        else:
            with configure_scope() as scope:
                if isinstance(cause, ClickhouseError):
                    scope.fingerprint = ["{{default}}", str(cause.code)]
                logger.exception("Error running query: %s\n%s", sql, cause)
            stats = update_with_status(QueryStatus.ERROR)
        raise QueryException({"stats": stats, "sql": sql}) from cause
    else:
        stats = update_with_status(QueryStatus.SUCCESS)
        return QueryResult(result, {"stats": stats, "sql": sql})
Esempio n. 5
0
def raw_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    stats: MutableMapping[str, Any],
    trace_id: Optional[str] = None,
    robust: bool = False,
) -> QueryResult:
    """
    Submits a raw SQL query to the DB and does some post-processing on it to
    fix some of the formatting issues in the result JSON.
    This function is not supposed to depend on anything higher level than the clickhouse
    query. If this function ends up depending on the dataset, something is wrong.
    """
    all_confs = state.get_all_configs()
    clickhouse_query_settings: MutableMapping[str, Any] = {
        k.split("/", 1)[1]: v
        for k, v in all_confs.items()
        if k.startswith("query_settings/")
    }

    timer.mark("get_configs")

    sql = formatted_query.get_sql()

    update_with_status = partial(
        update_query_metadata_and_stats,
        clickhouse_query,
        sql,
        timer,
        stats,
        query_metadata,
        clickhouse_query_settings,
        trace_id,
    )

    execute_query_strategy = (
        execute_query_with_readthrough_caching
        if state.get_config("use_readthrough_query_cache", 1)
        else execute_query_with_caching
    )

    try:
        result = execute_query_strategy(
            clickhouse_query,
            query_settings,
            formatted_query,
            reader,
            timer,
            stats,
            clickhouse_query_settings,
            robust=robust,
        )
    except Exception as cause:
        if isinstance(cause, RateLimitExceeded):
            stats = update_with_status(QueryStatus.RATE_LIMITED)
        else:
            error_code = None
            with configure_scope() as scope:
                if isinstance(cause, ClickhouseError):
                    error_code = cause.code
                    scope.fingerprint = ["{{default}}", str(cause.code)]
                    if scope.span:
                        if cause.code == errors.ErrorCodes.TOO_SLOW:
                            sentry_sdk.set_tag("timeout", "predicted")
                        elif cause.code == errors.ErrorCodes.TIMEOUT_EXCEEDED:
                            sentry_sdk.set_tag("timeout", "query_timeout")
                        elif cause.code in (
                            errors.ErrorCodes.SOCKET_TIMEOUT,
                            errors.ErrorCodes.NETWORK_ERROR,
                        ):
                            sentry_sdk.set_tag("timeout", "network")
                elif isinstance(
                    cause,
                    (TimeoutError, ExecutionTimeoutError, TigerExecutionTimeoutError),
                ):
                    if scope.span:
                        sentry_sdk.set_tag("timeout", "cache_timeout")

                logger.exception("Error running query: %s\n%s", sql, cause)
            stats = update_with_status(QueryStatus.ERROR, error_code=error_code)
        raise QueryException(
            {
                "stats": stats,
                "sql": sql,
                "experiments": clickhouse_query.get_experiments(),
            }
        ) from cause
    else:
        stats = update_with_status(QueryStatus.SUCCESS, result["profile"])
        return QueryResult(
            result,
            {
                "stats": stats,
                "sql": sql,
                "experiments": clickhouse_query.get_experiments(),
            },
        )