Beispiel #1
0
    def compare_conditions(
        self,
        subscription: SubscriptionData,
        exception: Optional[Type[Exception]],
        aggregate: str,
        value: Union[int, float],
    ) -> None:
        timer = Timer("test")
        if exception is not None:
            with pytest.raises(exception):
                request = subscription.build_request(
                    self.dataset,
                    datetime.utcnow(),
                    100,
                    timer,
                )
                parse_and_run_query(self.dataset, request, timer)
            return

        request = subscription.build_request(
            self.dataset,
            datetime.utcnow(),
            100,
            timer,
        )
        result = parse_and_run_query(self.dataset, request, timer)

        assert result.result["data"][0][aggregate] == value
Beispiel #2
0
 def create(self, data: SubscriptionData,
            timer: Timer) -> SubscriptionIdentifier:
     # We want to test the query out here to make sure it's valid and can run
     request = data.build_request(self.dataset, datetime.utcnow(), None,
                                  timer)
     parse_and_run_query(self.dataset, request, timer)
     identifier = SubscriptionIdentifier(
         self.__partitioner.build_partition_id(data),
         uuid1(),
     )
     RedisSubscriptionDataStore(redis_client, self.dataset,
                                identifier.partition).create(
                                    identifier.uuid,
                                    data,
                                )
     return identifier
Beispiel #3
0
    def __execute(self, task: ScheduledTask[Subscription],
                  tick: Tick) -> Tuple[Request, Result]:
        # Measure the amount of time that took between this task being
        # scheduled and it beginning to execute.
        self.__metrics.timing("executor.latency",
                              (time.time() - task.timestamp.timestamp()) *
                              1000)

        # XXX: The ``query`` name is taken from the web views so that all query
        # performance metrics are reported to the same spot, regardless of
        # execution environment.
        timer = Timer("query")

        request = task.task.data.build_request(
            self.__dataset,
            task.timestamp,
            tick.offsets.upper,
            timer,
        )

        with self.__concurrent_gauge:
            # XXX: The ``extra`` is discarded from ``QueryResult`` since it is
            # not particularly useful in this context and duplicates data that
            # is already being published to the query log.
            # XXX: The ``request`` instance is copied when passed to
            # ``parse_and_run_query`` since it can/will be mutated during
            # processing.
            return (
                request,
                parse_and_run_query(self.__dataset, copy.deepcopy(request),
                                    timer).result,
            )
Beispiel #4
0
    def __execute_query(self, task: ScheduledSubscriptionTask,
                        tick_upper_offset: int) -> Tuple[Request, Result]:
        # Measure the amount of time that took between the task's scheduled
        # time and it beginning to execute.
        self.__metrics.timing("executor.latency",
                              (time.time() - task.timestamp.timestamp()) *
                              1000)

        timer = Timer("query")

        with self.__concurrent_gauge:
            request = task.task.subscription.data.build_request(
                self.__dataset,
                task.timestamp,
                tick_upper_offset,
                timer,
                self.__metrics,
                "subscriptions_executor",
            )

            result = parse_and_run_query(
                self.__dataset,
                request,
                timer,
                robust=True,
                concurrent_queries_gauge=self.__concurrent_clickhouse_gauge,
            ).result

            return (request, result)
Beispiel #5
0
    def test_conditions(self, subscription: SubscriptionData,
                        exception: Optional[Exception]) -> None:
        timer = Timer("test")
        if exception is not None:
            with pytest.raises(exception):
                request = subscription.build_request(
                    self.dataset,
                    datetime.utcnow(),
                    100,
                    timer,
                )
                result = parse_and_run_query(self.dataset, request, timer)
            return

        request = subscription.build_request(
            self.dataset,
            datetime.utcnow(),
            100,
            timer,
        )
        result = parse_and_run_query(self.dataset, request, timer)
        assert result.result["data"][0]["count"] == 10
Beispiel #6
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Beispiel #7
0
 def test_conditions(self) -> None:
     subscription = SubscriptionData(
         project_id=self.project_id,
         conditions=[["platform", "IN", ["a"]]],
         aggregations=[["count()", "", "count"]],
         time_window=timedelta(minutes=500),
         resolution=timedelta(minutes=1),
     )
     timer = Timer("test")
     request = subscription.build_request(
         self.dataset,
         datetime.utcnow(),
         100,
         timer,
     )
     result = parse_and_run_query(self.dataset, request, timer)
     assert result.result["data"][0]["count"] == 10
Beispiel #8
0
            def run_non_consistent() -> Result:
                request_copy = Request(
                    id=request.id,
                    body=copy.deepcopy(request.body),
                    query=copy.deepcopy(request.query),
                    settings=SubscriptionRequestSettings(consistent=False),
                    referrer=request.referrer,
                )

                return parse_and_run_query(
                    self.__dataset,
                    request_copy,
                    timer,
                    robust=True,
                    concurrent_queries_gauge=self.__concurrent_clickhouse_gauge
                    if not is_consistent_query else None,
                ).result
Beispiel #9
0
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult:
    try:
        result = parse_and_run_query(dataset, request, timer)
        payload = {**result.result, "timing": timer.for_json()}
        if settings.STATS_IN_RESPONSE or request.settings.get_debug():
            payload.update(result.extra)
        return WebQueryResult(payload, 200)
    except RawQueryException as e:
        return WebQueryResult(
            {
                "error": {"type": e.err_type, "message": e.message, **e.meta},
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
Beispiel #10
0
def run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult:
    try:
        return QueryResult(
            {
                **parse_and_run_query(dataset, request, timer),
                "timing":
                timer.for_json(),
            },
            200,
        )
    except RawQueryException as e:
        return QueryResult(
            {
                "error": {
                    "type": e.err_type,
                    "message": e.message,
                    **e.meta
                },
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
Beispiel #11
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Beispiel #12
0
 def _test_request(self, data: SubscriptionData, timer: Timer) -> None:
     request = data.build_request(self.dataset, datetime.utcnow(), None, timer)
     parse_and_run_query(self.dataset, request, timer)
Beispiel #13
0
def test_transform_column_names() -> None:
    """
    Runs a simple query containing selected expressions names that
    do not match the aliases of the expressions themselves.
    It verifies that the names of the columns in the result correspond
    to the SelectedExpression names and not to the expression aliases
    (which are supposed to be internal).
    """
    events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()

    event_id = uuid.uuid4().hex

    event_date = datetime.utcnow()
    write_unprocessed_events(
        events_storage,
        [
            InsertEvent(
                {
                    "event_id": event_id,
                    "group_id": 10,
                    "primary_hash": uuid.uuid4().hex,
                    "project_id": 1,
                    "message": "a message",
                    "platform": "python",
                    "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT),
                    "data": {"received": time.time()},
                    "organization_id": 1,
                    "retention_days": settings.DEFAULT_RETENTION_DAYS,
                }
            )
        ],
    )

    query = Query(
        Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()),
        selected_columns=[
            # The selected expression names are those provided by the
            # user in the query and those the user expect in the response.
            # Aliases will be internal names to prevent shadowing.
            SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")),
            SelectedExpression(
                "message",
                FunctionCall(
                    "_snuba_message",
                    "ifNull",
                    (Column(None, None, "message"), Literal(None, "")),
                ),
            ),
        ],
    )
    query_settings = HTTPRequestSettings()
    apply_query_extensions(
        query,
        {
            "timeseries": {
                "from_date": (event_date - timedelta(minutes=5)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "to_date": (event_date + timedelta(minutes=1)).strftime(
                    settings.PAYLOAD_DATETIME_FORMAT
                ),
                "granularity": 3600,
            },
            "project": {"project": [1]},
        },
        query_settings,
    )

    dataset = get_dataset("events")
    timer = Timer("test")

    result = parse_and_run_query(
        dataset,
        Request(
            id="asd", body={}, query=query, settings=query_settings, referrer="asd",
        ),
        timer,
    )

    data = result.result["data"]
    assert data == [{"event_id": event_id, "message": "a message"}]
    meta = result.result["meta"]

    assert meta == [
        MetaColumn(name="event_id", type="String"),
        MetaColumn(name="message", type="String"),
    ]
Beispiel #14
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})