Example #1
0
    def build_request(
        self,
        dataset: Dataset,
        timestamp: datetime,
        offset: Optional[int],
        timer: Timer,
        metrics: Optional[MetricsBackend] = None,
        referrer: str = SUBSCRIPTION_REFERRER,
    ) -> Request:
        schema = RequestSchema.build(SubscriptionQuerySettings)

        request = build_request(
            {"query": self.query},
            parse_snql_query,
            SubscriptionQuerySettings,
            schema,
            dataset,
            timer,
            referrer,
            [
                self.entity_subscription.validate_query,
                partial(self.add_conditions, timestamp, offset),
            ],
        )
        return request
Example #2
0
 def build_request(self, dataset: Dataset, timestamp: datetime,
                   offset: Optional[int], timer: Timer) -> Request:
     """
     Returns a Request that can be used to run a query via `parse_and_run_query`.
     :param dataset: The Dataset to build the request for
     :param timestamp: Date that the query should run up until
     :param offset: Maximum offset we should query for
     """
     schema = RequestSchema.build_with_extensions(
         dataset.get_extensions(),
         SubscriptionRequestSettings,
     )
     extra_conditions: Sequence[Condition] = []
     if offset is not None:
         extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]]
     return build_request(
         {
             "project": self.project_id,
             "conditions": [*self.conditions, *extra_conditions],
             "aggregations": self.aggregations,
             "from_date": (timestamp - self.time_window).isoformat(),
             "to_date": timestamp.isoformat(),
         },
         schema,
         timer,
         dataset,
         SUBSCRIPTION_REFERRER,
     )
Example #3
0
    def build_request(
        self,
        dataset: Dataset,
        timestamp: datetime,
        offset: Optional[int],
        timer: Timer,
        metrics: Optional[MetricsBackend] = None,
    ) -> Request:
        schema = RequestSchema.build_with_extensions(
            {},
            SubscriptionRequestSettings,
            Language.SNQL,
        )

        request = build_request(
            {"query": self.query},
            partial(
                parse_snql_query,
                [
                    self.validate_subscription,
                    partial(self.add_conditions, timestamp, offset),
                ],
            ),
            SubscriptionRequestSettings,
            schema,
            dataset,
            timer,
            SUBSCRIPTION_REFERRER,
        )
        return request
Example #4
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Example #5
0
def test_build_request(body: MutableMapping[str, Any], language: Language,
                       condition: Expression) -> None:
    dataset = get_dataset("events")
    entity = dataset.get_default_entity()
    schema = RequestSchema.build_with_extensions(
        entity.get_extensions(),
        HTTPRequestSettings,
        language,
    )

    request = build_request(
        body,
        parse_legacy_query if language == Language.LEGACY else partial(
            parse_snql_query, []),
        HTTPRequestSettings,
        schema,
        dataset,
        Timer("test"),
        "my_request",
    )

    expected_query = Query(
        from_clause=Entity(EntityKey.EVENTS, entity.get_data_model()),
        selected_columns=[
            SelectedExpression(
                name="time",
                expression=Column(alias="_snuba_time",
                                  table_name=None,
                                  column_name="time"),
            ),
            SelectedExpression("count",
                               FunctionCall("_snuba_count", "count", tuple())),
        ],
        condition=condition,
        groupby=[Column("_snuba_time", None, "time")],
        limit=1000,
        granularity=60,
    )

    assert request.referrer == "my_request"
    assert dict(request.body) == body
    status, differences = request.query.equals(expected_query)
    assert status == True, f"Query mismatch: {differences}"
Example #6
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Example #7
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Example #8
0
def test_nullable_field_casting(entity: Entity,
                                expected_table_name: str) -> None:
    dataset_name = "discover"

    query_str = """MATCH (discover)
    SELECT
        uniq(sdk_version)
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        # The only reason this extends StringifyVisitor is because it has all the other
        # visit methods implemented.
        class NullCastingVerifier(StringifyVisitor):
            def __init__(self) -> None:
                self.sdk_version_cast_to_null = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> str:
                if (exp.function_name == "cast"
                        and exp.alias == "_snuba_sdk_version"
                        and exp.parameters == (
                            Column(None, None, "sdk_version"),
                            Literal(None, "Nullable(String)"),
                        )):
                    self.sdk_version_cast_to_null = True
                return super().visit_function_call(exp)

        for select_expr in query.get_selected_columns():
            verifier = NullCastingVerifier()
            select_expr.expression.accept(verifier)
            assert verifier.sdk_version_cast_to_null

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
def test_span_id_promotion(entity: Entity, expected_table_name: str) -> None:
    """In order to save space in the contexts column and provide faster query
    performance, we promote span_id to a proper column and don't store it in the
    actual contexts object in the DB.

    The client however, still queries by `contexts[trace.span_id]` and expects that
    it is a hex string rather than a 64 bit uint (which is what we store it as)

    This test makes sure that our query pipeline will do the proper column promotion and conversion
    """

    dataset_name = "discover"

    # The client queries by contexts[trace.span_id] even though that's not how we store it
    query_str = f"""MATCH (discover)
    SELECT
        contexts[trace.span_id]
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        contexts[trace.span_id] = '{span_id_hex}' AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert isinstance(query, Query)
        # in local and CI there's a table name difference
        # errors_local vs errors_dist and discover_local vs discover_dist
        # so we check using `in` instead of `==`
        assert expected_table_name in query.get_from_clause().table_name
        assert query.get_selected_columns() == [
            SelectedExpression(
                name="contexts[trace.span_id]",
                # the select converts the span_id into a lowecase hex string
                expression=FunctionCall(
                    "_snuba_contexts[trace.span_id]",
                    "lower",
                    (FunctionCall(None, "hex",
                                  (Column(None, None, "span_id"), )), ),
                ),
            )
        ]

        class SpanIdVerifier(NoopVisitor):
            def __init__(self) -> None:
                self.found_span_condition = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> None:
                if exp.function_name == "equals" and exp.parameters[
                        0] == Column(None, None, "span_id"):
                    self.found_span_condition = True
                    # and here we can see that the hex string the client queried us with
                    # has been converted to the correct uint64
                    assert exp.parameters[1] == Literal(
                        None, span_id_as_uint64)
                return super().visit_function_call(exp)

        verifier = SpanIdVerifier()
        condition = query.get_condition()
        assert condition is not None
        condition.accept(verifier)
        assert verifier.found_span_condition

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
Example #10
0
def test_tags_hashmap_optimization() -> None:
    entity = get_entity(EntityKey.DISCOVER)
    dataset_name = "discover"
    query_str = """
    MATCH (discover)
    SELECT count() AS count
    WHERE
        timestamp >= toDateTime('2021-07-12T19:45:01') AND
        timestamp < toDateTime('2021-08-11T19:45:01') AND
        project_id IN tuple(300688)
        AND ifNull(tags[duration_group], '') != '' AND
        ifNull(tags[duration_group], '') = '<10s'
    LIMIT 50
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )
    # --------------------------------------------------------------------

    def query_verifier(query: Query, settings: QuerySettings, reader: Reader) -> None:
        class ConditionVisitor(NoopVisitor):
            def __init__(self) -> None:
                self.found_hashmap_condition = False

            def visit_function_call(self, exp: FunctionCall) -> None:
                assert exp.function_name != "arrayElement"
                if (
                    exp.function_name == "has"
                    and isinstance(exp.parameters[0], Column)
                    and exp.parameters[0].column_name == "_tags_hash_map"
                ):
                    self.found_hashmap_condition = True
                return super().visit_function_call(exp)

        visitor = ConditionVisitor()
        query.get_condition().accept(visitor)
        assert visitor.found_hashmap_condition

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier
    ).execute()