コード例 #1
0
ファイル: test_data.py プロジェクト: getsentry/snuba
    def compare_conditions(
        self,
        subscription: SubscriptionData,
        exception: Optional[Type[Exception]],
        aggregate: str,
        value: Union[int, float],
    ) -> None:
        timer = Timer("test")
        if exception is not None:
            with pytest.raises(exception):
                request = subscription.build_request(
                    self.dataset,
                    datetime.utcnow(),
                    100,
                    timer,
                )
                parse_and_run_query(self.dataset, request, timer)
            return

        request = subscription.build_request(
            self.dataset,
            datetime.utcnow(),
            100,
            timer,
        )
        result = parse_and_run_query(self.dataset, request, timer)

        assert result.result["data"][0][aggregate] == value
コード例 #2
0
ファイル: subscription.py プロジェクト: ruezetle/snuba
 def create(self, data: SubscriptionData,
            timer: Timer) -> SubscriptionIdentifier:
     # We want to test the query out here to make sure it's valid and can run
     request = data.build_request(self.dataset, datetime.utcnow(), None,
                                  timer)
     parse_and_run_query(self.dataset, request, timer)
     identifier = SubscriptionIdentifier(
         self.__partitioner.build_partition_id(data),
         uuid1(),
     )
     RedisSubscriptionDataStore(redis_client, self.dataset,
                                identifier.partition).create(
                                    identifier.uuid,
                                    data,
                                )
     return identifier
コード例 #3
0
ファイル: worker.py プロジェクト: isabella232/snuba
    def __execute(self, task: ScheduledTask[Subscription],
                  tick: Tick) -> Tuple[Request, Result]:
        # Measure the amount of time that took between this task being
        # scheduled and it beginning to execute.
        self.__metrics.timing("executor.latency",
                              (time.time() - task.timestamp.timestamp()) *
                              1000)

        # XXX: The ``query`` name is taken from the web views so that all query
        # performance metrics are reported to the same spot, regardless of
        # execution environment.
        timer = Timer("query")

        request = task.task.data.build_request(
            self.__dataset,
            task.timestamp,
            tick.offsets.upper,
            timer,
        )

        with self.__concurrent_gauge:
            # XXX: The ``extra`` is discarded from ``QueryResult`` since it is
            # not particularly useful in this context and duplicates data that
            # is already being published to the query log.
            # XXX: The ``request`` instance is copied when passed to
            # ``parse_and_run_query`` since it can/will be mutated during
            # processing.
            return (
                request,
                parse_and_run_query(self.__dataset, copy.deepcopy(request),
                                    timer).result,
            )
コード例 #4
0
ファイル: executor_consumer.py プロジェクト: getsentry/snuba
    def __execute_query(self, task: ScheduledSubscriptionTask,
                        tick_upper_offset: int) -> Tuple[Request, Result]:
        # Measure the amount of time that took between the task's scheduled
        # time and it beginning to execute.
        self.__metrics.timing("executor.latency",
                              (time.time() - task.timestamp.timestamp()) *
                              1000)

        timer = Timer("query")

        with self.__concurrent_gauge:
            request = task.task.subscription.data.build_request(
                self.__dataset,
                task.timestamp,
                tick_upper_offset,
                timer,
                self.__metrics,
                "subscriptions_executor",
            )

            result = parse_and_run_query(
                self.__dataset,
                request,
                timer,
                robust=True,
                concurrent_queries_gauge=self.__concurrent_clickhouse_gauge,
            ).result

            return (request, result)
コード例 #5
0
    def test_conditions(self, subscription: SubscriptionData,
                        exception: Optional[Exception]) -> None:
        timer = Timer("test")
        if exception is not None:
            with pytest.raises(exception):
                request = subscription.build_request(
                    self.dataset,
                    datetime.utcnow(),
                    100,
                    timer,
                )
                result = parse_and_run_query(self.dataset, request, timer)
            return

        request = subscription.build_request(
            self.dataset,
            datetime.utcnow(),
            100,
            timer,
        )
        result = parse_and_run_query(self.dataset, request, timer)
        assert result.result["data"][0]["count"] == 10
コード例 #6
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
コード例 #7
0
 def test_conditions(self) -> None:
     subscription = SubscriptionData(
         project_id=self.project_id,
         conditions=[["platform", "IN", ["a"]]],
         aggregations=[["count()", "", "count"]],
         time_window=timedelta(minutes=500),
         resolution=timedelta(minutes=1),
     )
     timer = Timer("test")
     request = subscription.build_request(
         self.dataset,
         datetime.utcnow(),
         100,
         timer,
     )
     result = parse_and_run_query(self.dataset, request, timer)
     assert result.result["data"][0]["count"] == 10
コード例 #8
0
ファイル: worker.py プロジェクト: pombredanne/snuba
            def run_non_consistent() -> Result:
                request_copy = Request(
                    id=request.id,
                    body=copy.deepcopy(request.body),
                    query=copy.deepcopy(request.query),
                    settings=SubscriptionRequestSettings(consistent=False),
                    referrer=request.referrer,
                )

                return parse_and_run_query(
                    self.__dataset,
                    request_copy,
                    timer,
                    robust=True,
                    concurrent_queries_gauge=self.__concurrent_clickhouse_gauge
                    if not is_consistent_query else None,
                ).result
コード例 #9
0
ファイル: views.py プロジェクト: ruezetle/snuba
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult:
    try:
        result = parse_and_run_query(dataset, request, timer)
        payload = {**result.result, "timing": timer.for_json()}
        if settings.STATS_IN_RESPONSE or request.settings.get_debug():
            payload.update(result.extra)
        return WebQueryResult(payload, 200)
    except RawQueryException as e:
        return WebQueryResult(
            {
                "error": {"type": e.err_type, "message": e.message, **e.meta},
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
コード例 #10
0
ファイル: views.py プロジェクト: jiankunking/snuba
def run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult:
    try:
        return QueryResult(
            {
                **parse_and_run_query(dataset, request, timer),
                "timing":
                timer.for_json(),
            },
            200,
        )
    except RawQueryException as e:
        return QueryResult(
            {
                "error": {
                    "type": e.err_type,
                    "message": e.message,
                    **e.meta
                },
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
コード例 #11
0
ファイル: views.py プロジェクト: chhetripradeep/snuba
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
コード例 #12
0
ファイル: subscription.py プロジェクト: pombredanne/snuba
 def _test_request(self, data: SubscriptionData, timer: Timer) -> None:
     request = data.build_request(self.dataset, datetime.utcnow(), None, timer)
     parse_and_run_query(self.dataset, request, timer)
コード例 #13
0
def test_transform_column_names() -> None:
    """
    Runs a simple query containing selected expressions names that
    do not match the aliases of the expressions themselves.
    It verifies that the names of the columns in the result correspond
    to the SelectedExpression names and not to the expression aliases
    (which are supposed to be internal).
    """
    events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()

    event_id = uuid.uuid4().hex

    event_date = datetime.utcnow()
    write_unprocessed_events(
        events_storage,
        [
            InsertEvent(
                {
                    "event_id": event_id,
                    "group_id": 10,
                    "primary_hash": uuid.uuid4().hex,
                    "project_id": 1,
                    "message": "a message",
                    "platform": "python",
                    "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT),
                    "data": {"received": time.time()},
                    "organization_id": 1,
                    "retention_days": settings.DEFAULT_RETENTION_DAYS,
                }
            )
        ],
    )

    query = Query(
        Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()),
        selected_columns=[
            # The selected expression names are those provided by the
            # user in the query and those the user expect in the response.
            # Aliases will be internal names to prevent shadowing.
            SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")),
            SelectedExpression(
                "message",
                FunctionCall(
                    "_snuba_message",
                    "ifNull",
                    (Column(None, None, "message"), Literal(None, "")),
                ),
            ),
        ],
    )
    query_settings = HTTPRequestSettings()
    apply_query_extensions(
        query,
        {
            "timeseries": {
                "from_date": (event_date - timedelta(minutes=5)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "to_date": (event_date + timedelta(minutes=1)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "granularity": 3600,
            },
            "project": {"project": [1]},
        },
        query_settings,
    )

    dataset = get_dataset("events")
    timer = Timer("test")

    result = parse_and_run_query(
        dataset,
        Request(
            id="asd", body={}, query=query, settings=query_settings, referrer="asd",
        ),
        timer,
    )

    data = result.result["data"]
    assert data == [{"event_id": event_id, "message": "a message"}]
    meta = result.result["meta"]

    assert meta == [
        MetaColumn(name="event_id", type="String"),
        MetaColumn(name="message", type="String"),
    ]
コード例 #14
0
ファイル: views.py プロジェクト: getsentry/snuba
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})