Ejemplo n.º 1
0
    def build_request(
        self,
        dataset: Dataset,
        timestamp: datetime,
        offset: Optional[int],
        timer: Timer,
        metrics: Optional[MetricsBackend] = None,
        referrer: str = SUBSCRIPTION_REFERRER,
    ) -> Request:
        schema = RequestSchema.build(SubscriptionQuerySettings)

        request = build_request(
            {"query": self.query},
            parse_snql_query,
            SubscriptionQuerySettings,
            schema,
            dataset,
            timer,
            referrer,
            [
                self.entity_subscription.validate_query,
                partial(self.add_conditions, timestamp, offset),
            ],
        )
        return request
Ejemplo n.º 2
0
def sdk_distribution(*, timer: Timer):
    request = validate_request_content(
        parse_request_body(http_request),
        RequestSchema(
            schemas.SDK_STATS_BASE_SCHEMA,
            SETTINGS_SCHEMA,
            schemas.SDK_STATS_EXTENSIONS_SCHEMA,
        ),
        timer,
    )

    request.query.set_aggregations([
        ['uniq', 'project_id', 'projects'],
        ['count()', None, 'count'],
    ])
    request.query.add_groupby(['sdk_name', 'rtime'])
    request.extensions['project'] = {
        'project': [],
    }

    dataset = get_dataset('events')
    ensure_table_exists(dataset)

    query_result = parse_and_run_query(dataset, request, timer)
    return (json.dumps(query_result.result,
                       for_json=True,
                       default=lambda obj: obj.isoformat()
                       if isinstance(obj, datetime) else obj),
            query_result.status, {
                'Content-Type': 'application/json'
            })
Ejemplo n.º 3
0
def test_split_request() -> None:
    payload = {
        "turbo": False,
        "consistent": False,
        "debug": False,
        "dry_run": False,
        "legacy": False,
        "team": "sns",
        "feature": "attribution",
        "app_id": "foobar",
        "query": """MATCH (something) dontcare""",
    }
    schema = RequestSchema.build(HTTPQuerySettings)
    parts = schema.validate(payload)
    assert set(parts.query_settings.keys()) == {
        "turbo",
        "consistent",
        "debug",
        "dry_run",
        "legacy",
        "referrer",
    }
    assert set(parts.attribution_info.keys()) == {
        "team",
        "feature",
        "app_id",
        "parent_api",
        "referrer",
    }
    assert set(parts.query.keys()) == {"query"}
Ejemplo n.º 4
0
    def build_request(
        self,
        dataset: Dataset,
        timestamp: datetime,
        offset: Optional[int],
        timer: Timer,
        metrics: Optional[MetricsBackend] = None,
    ) -> Request:
        schema = RequestSchema.build_with_extensions(
            {},
            SubscriptionRequestSettings,
            Language.SNQL,
        )

        request = build_request(
            {"query": self.query},
            partial(
                parse_snql_query,
                [
                    self.validate_subscription,
                    partial(self.add_conditions, timestamp, offset),
                ],
            ),
            SubscriptionRequestSettings,
            schema,
            dataset,
            timer,
            SUBSCRIPTION_REFERRER,
        )
        return request
Ejemplo n.º 5
0
 def build_request(
     self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer
 ) -> Request:
     """
     Returns a Request that can be used to run a query via `parse_and_run_query`.
     :param dataset: The Dataset to build the request for
     :param timestamp: Date that the query should run up until
     :param offset: Maximum offset we should query for
     """
     schema = RequestSchema.build_with_extensions(
         dataset.get_extensions(), SubscriptionRequestSettings,
     )
     extra_conditions: Sequence[Condition] = []
     if offset is not None:
         extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]]
     return validate_request_content(
         {
             "project": self.project_id,
             "conditions": [*self.conditions, *extra_conditions],
             "aggregations": self.aggregations,
             "from_date": (timestamp - self.time_window).isoformat(),
             "to_date": timestamp.isoformat(),
         },
         schema,
         timer,
         dataset,
         SUBSCRIPTION_REFERRER,
     )
Ejemplo n.º 6
0
def validate_request_content(body, schema: RequestSchema, timer) -> Request:
    try:
        request = schema.validate(body)
    except jsonschema.ValidationError as error:
        raise BadRequest(str(error)) from error

    timer.mark('validate_schema')

    return request
Ejemplo n.º 7
0
Archivo: views.py Proyecto: Appva/snuba
def validate_request_content(body, schema: RequestSchema, timer,
                             dataset: Dataset) -> Request:
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    try:
        request = schema.validate(body, source)
    except jsonschema.ValidationError as error:
        raise BadRequest(str(error)) from error

    timer.mark('validate_schema')

    return request
Ejemplo n.º 8
0
def validate_request_content(body, schema: RequestSchema, timer: Timer,
                             dataset: Dataset, referrer: str) -> Request:
    with sentry_sdk.start_span(description="validate_request_content",
                               op="validate") as span:
        try:
            request = schema.validate(body, dataset, referrer)
            span.set_data("snuba_query", request.body)
        except jsonschema.ValidationError as error:
            raise BadRequest(str(error)) from error

        timer.mark("validate_schema")

    return request
Ejemplo n.º 9
0
def snql_dataset_query_view(*, dataset: Dataset, timer: Timer) -> Union[Response, str]:
    if http_request.method == "GET":
        schema = RequestSchema.build(HTTPQuerySettings)
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        _trace_transaction(dataset)
        return dataset_query(dataset, body, timer)
    else:
        assert False, "unexpected fallthrough"
Ejemplo n.º 10
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Ejemplo n.º 11
0
def build_request(
    body: MutableMapping[str, Any],
    parser: Parser,
    settings_class: Union[Type[HTTPRequestSettings], Type[SubscriptionRequestSettings]],
    schema: RequestSchema,
    dataset: Dataset,
    timer: Timer,
    referrer: str,
) -> Request:
    with sentry_sdk.start_span(description="build_request", op="validate") as span:
        try:
            request_parts = schema.validate(body)
            if settings_class == HTTPRequestSettings:
                settings = {
                    **request_parts.settings,
                    "consistent": _consistent_override(
                        request_parts.settings.get("consistent", False), referrer
                    ),
                }
                settings_obj: Union[
                    HTTPRequestSettings, SubscriptionRequestSettings
                ] = settings_class(**settings)
            elif settings_class == SubscriptionRequestSettings:
                settings_obj = settings_class(
                    consistent=_consistent_override(True, referrer)
                )

            query = parser(request_parts, settings_obj, dataset)

            request_id = uuid.uuid4().hex
            request = Request(
                request_id,
                # TODO: Replace this with the actual query raw body.
                # this can have an impact on subscriptions so we need
                # to be careful with the change.
                ChainMap(request_parts.query, *request_parts.extensions.values()),
                query,
                settings_obj,
                referrer,
            )
        except (InvalidJsonRequestException, InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data("snuba_query", request.body)

        timer.mark("validate_schema")
        return request
Ejemplo n.º 12
0
def dataset_query_view(*, dataset: Dataset, timer: Timer):
    if http_request.method == "GET":
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4,),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        return dataset_query(dataset, body, timer)
    else:
        assert False, "unexpected fallthrough"
Ejemplo n.º 13
0
def dataset_query_view(*, dataset_name: str, timer: Timer):
    dataset = get_dataset(dataset_name)
    if http_request.method == 'GET':
        schema = RequestSchema.build_with_extensions(dataset.get_extensions())
        return render_template(
            'query.html',
            query_template=json.dumps(
                schema.generate_template(),
                indent=4,
            ),
        )
    elif http_request.method == 'POST':
        body = parse_request_body(http_request)
        return dataset_query(dataset, body, timer)
    else:
        assert False, 'unexpected fallthrough'
Ejemplo n.º 14
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"
    ensure_table_exists(dataset)
    return format_result(
        run_query(
            dataset,
            validate_request_content(
                body,
                RequestSchema.build_with_extensions(dataset.get_extensions(),
                                                    HTTPRequestSettings),
                timer,
                dataset,
                http_request.referrer,
            ),
            timer,
        ))
Ejemplo n.º 15
0
def dataset_query_view(*, dataset: Dataset, timer: Timer) -> Union[Response, str]:
    if http_request.method == "GET":
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(),
            HTTPRequestSettings,
            Language.LEGACY,
        )
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4,),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        _trace_transaction(dataset)
        return dataset_query(dataset, body, timer, Language.LEGACY)
    else:
        assert False, "unexpected fallthrough"
Ejemplo n.º 16
0
def build_request(body, schema: RequestSchema, timer: Timer, dataset: Dataset,
                  referrer: str) -> Request:
    with sentry_sdk.start_span(description="build_request",
                               op="validate") as span:
        try:
            request = schema.validate(body, dataset, referrer)
        except (InvalidJsonRequestException,
                InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data("snuba_query", request.body)

        timer.mark("validate_schema")
        return request
Ejemplo n.º 17
0
def test_build_request(body: MutableMapping[str, Any], language: Language,
                       condition: Expression) -> None:
    dataset = get_dataset("events")
    entity = dataset.get_default_entity()
    schema = RequestSchema.build_with_extensions(
        entity.get_extensions(),
        HTTPRequestSettings,
        language,
    )

    request = build_request(
        body,
        parse_legacy_query if language == Language.LEGACY else partial(
            parse_snql_query, []),
        HTTPRequestSettings,
        schema,
        dataset,
        Timer("test"),
        "my_request",
    )

    expected_query = Query(
        from_clause=Entity(EntityKey.EVENTS, entity.get_data_model()),
        selected_columns=[
            SelectedExpression(
                name="time",
                expression=Column(alias="_snuba_time",
                                  table_name=None,
                                  column_name="time"),
            ),
            SelectedExpression("count",
                               FunctionCall("_snuba_count", "count", tuple())),
        ],
        condition=condition,
        groupby=[Column("_snuba_time", None, "time")],
        limit=1000,
        granularity=60,
    )

    assert request.referrer == "my_request"
    assert dict(request.body) == body
    status, differences = request.query.equals(expected_query)
    assert status == True, f"Query mismatch: {differences}"
Ejemplo n.º 18
0
def dataset_query(dataset, body, timer):
    assert http_request.method == 'POST'
    ensure_table_exists(dataset)

    schema = RequestSchema.build_with_extensions(dataset.get_extensions())
    query_result = parse_and_run_query(
        dataset,
        validate_request_content(body, schema, timer),
        timer,
    )

    def json_default(obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        elif isinstance(obj, UUID):
            return str(obj)
        return obj

    return (json.dumps(query_result.result,
                       for_json=True,
                       default=json_default), query_result.status, {
                           'Content-Type': 'application/json'
                       })
Ejemplo n.º 19
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Ejemplo n.º 20
0
def test_span_id_promotion(entity: Entity, expected_table_name: str) -> None:
    """In order to save space in the contexts column and provide faster query
    performance, we promote span_id to a proper column and don't store it in the
    actual contexts object in the DB.

    The client however, still queries by `contexts[trace.span_id]` and expects that
    it is a hex string rather than a 64 bit uint (which is what we store it as)

    This test makes sure that our query pipeline will do the proper column promotion and conversion
    """

    dataset_name = "discover"

    # The client queries by contexts[trace.span_id] even though that's not how we store it
    query_str = f"""MATCH (discover)
    SELECT
        contexts[trace.span_id]
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        contexts[trace.span_id] = '{span_id_hex}' AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert isinstance(query, Query)
        # in local and CI there's a table name difference
        # errors_local vs errors_dist and discover_local vs discover_dist
        # so we check using `in` instead of `==`
        assert expected_table_name in query.get_from_clause().table_name
        assert query.get_selected_columns() == [
            SelectedExpression(
                name="contexts[trace.span_id]",
                # the select converts the span_id into a lowecase hex string
                expression=FunctionCall(
                    "_snuba_contexts[trace.span_id]",
                    "lower",
                    (FunctionCall(None, "hex",
                                  (Column(None, None, "span_id"), )), ),
                ),
            )
        ]

        class SpanIdVerifier(NoopVisitor):
            def __init__(self) -> None:
                self.found_span_condition = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> None:
                if exp.function_name == "equals" and exp.parameters[
                        0] == Column(None, None, "span_id"):
                    self.found_span_condition = True
                    # and here we can see that the hex string the client queried us with
                    # has been converted to the correct uint64
                    assert exp.parameters[1] == Literal(
                        None, span_id_as_uint64)
                return super().visit_function_call(exp)

        verifier = SpanIdVerifier()
        condition = query.get_condition()
        assert condition is not None
        condition.accept(verifier)
        assert verifier.found_span_condition

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
Ejemplo n.º 21
0
def test_nullable_field_casting(entity: Entity,
                                expected_table_name: str) -> None:
    dataset_name = "discover"

    query_str = """MATCH (discover)
    SELECT
        uniq(sdk_version)
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        # The only reason this extends StringifyVisitor is because it has all the other
        # visit methods implemented.
        class NullCastingVerifier(StringifyVisitor):
            def __init__(self) -> None:
                self.sdk_version_cast_to_null = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> str:
                if (exp.function_name == "cast"
                        and exp.alias == "_snuba_sdk_version"
                        and exp.parameters == (
                            Column(None, None, "sdk_version"),
                            Literal(None, "Nullable(String)"),
                        )):
                    self.sdk_version_cast_to_null = True
                return super().visit_function_call(exp)

        for select_expr in query.get_selected_columns():
            verifier = NullCastingVerifier()
            select_expr.expression.accept(verifier)
            assert verifier.sdk_version_cast_to_null

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
Ejemplo n.º 22
0
def build_request(
    body: MutableMapping[str, Any],
    parser: Parser,
    settings_class: Union[Type[HTTPQuerySettings], Type[SubscriptionQuerySettings]],
    schema: RequestSchema,
    dataset: Dataset,
    timer: Timer,
    referrer: str,
    custom_processing: Optional[CustomProcessors] = None,
) -> Request:
    with sentry_sdk.start_span(description="build_request", op="validate") as span:
        try:
            request_parts = schema.validate(body)
            if settings_class == HTTPQuerySettings:
                query_settings: MutableMapping[str, bool | str] = {
                    **request_parts.query_settings,
                    "consistent": _consistent_override(
                        request_parts.query_settings.get("consistent", False), referrer
                    ),
                }
                query_settings["referrer"] = referrer
                # TODO: referrer probably doesn't need to be passed in, it should be from the body
                settings_obj: Union[
                    HTTPQuerySettings, SubscriptionQuerySettings
                ] = settings_class(
                    **query_settings,
                )
            elif settings_class == SubscriptionQuerySettings:
                settings_obj = settings_class(
                    consistent=_consistent_override(True, referrer),
                )
            query, snql_anonymized = parser(
                request_parts, settings_obj, dataset, custom_processing
            )

            project_ids = get_object_ids_in_query_ast(query, "project_id")
            if project_ids is not None and len(project_ids) == 1:
                sentry_sdk.set_tag("snuba_project_id", project_ids.pop())

            org_ids = get_object_ids_in_query_ast(query, "org_id")
            if org_ids is not None and len(org_ids) == 1:
                sentry_sdk.set_tag("snuba_org_id", org_ids.pop())
            attribution_info = dict(request_parts.attribution_info)
            # TODO: clean this up
            attribution_info["app_id"] = get_app_id(
                request_parts.attribution_info["app_id"]
            )
            attribution_info["referrer"] = referrer

            request_id = uuid.uuid4().hex
            request = Request(
                id=request_id,
                # TODO: Replace this with the actual query raw body.
                # this can have an impact on subscriptions so we need
                # to be careful with the change.
                original_body=body,
                query=query,
                attribution_info=AttributionInfo(**attribution_info),
                query_settings=settings_obj,
                snql_anonymized=snql_anonymized,
            )
        except (InvalidJsonRequestException, InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data(
            "snuba_query_parsed",
            repr(query).split("\n"),
        )
        span.set_data(
            "snuba_query_raw",
            textwrap.wrap(repr(request.original_body), 100, break_long_words=False),
        )
        sentry_sdk.add_breadcrumb(
            category="query_info",
            level="info",
            message="snuba_query_raw",
            data={
                "query": textwrap.wrap(
                    repr(request.original_body), 100, break_long_words=False
                )
            },
        )

        timer.mark("validate_schema")
        return request
Ejemplo n.º 23
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Ejemplo n.º 24
0
def test_tags_hashmap_optimization() -> None:
    entity = get_entity(EntityKey.DISCOVER)
    dataset_name = "discover"
    query_str = """
    MATCH (discover)
    SELECT count() AS count
    WHERE
        timestamp >= toDateTime('2021-07-12T19:45:01') AND
        timestamp < toDateTime('2021-08-11T19:45:01') AND
        project_id IN tuple(300688)
        AND ifNull(tags[duration_group], '') != '' AND
        ifNull(tags[duration_group], '') = '<10s'
    LIMIT 50
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )
    # --------------------------------------------------------------------

    def query_verifier(query: Query, settings: QuerySettings, reader: Reader) -> None:
        class ConditionVisitor(NoopVisitor):
            def __init__(self) -> None:
                self.found_hashmap_condition = False

            def visit_function_call(self, exp: FunctionCall) -> None:
                assert exp.function_name != "arrayElement"
                if (
                    exp.function_name == "has"
                    and isinstance(exp.parameters[0], Column)
                    and exp.parameters[0].column_name == "_tags_hash_map"
                ):
                    self.found_hashmap_condition = True
                return super().visit_function_call(exp)

        visitor = ConditionVisitor()
        query.get_condition().accept(visitor)
        assert visitor.found_hashmap_condition

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier
    ).execute()