Esempio n. 1
0
def test_split_request() -> None:
    payload = {
        "turbo": False,
        "consistent": False,
        "debug": False,
        "dry_run": False,
        "legacy": False,
        "team": "sns",
        "feature": "attribution",
        "app_id": "foobar",
        "query": """MATCH (something) dontcare""",
    }
    schema = RequestSchema.build(HTTPQuerySettings)
    parts = schema.validate(payload)
    assert set(parts.query_settings.keys()) == {
        "turbo",
        "consistent",
        "debug",
        "dry_run",
        "legacy",
        "referrer",
    }
    assert set(parts.attribution_info.keys()) == {
        "team",
        "feature",
        "app_id",
        "parent_api",
        "referrer",
    }
    assert set(parts.query.keys()) == {"query"}
Esempio n. 2
0
    def build_request(
        self,
        dataset: Dataset,
        timestamp: datetime,
        offset: Optional[int],
        timer: Timer,
        metrics: Optional[MetricsBackend] = None,
        referrer: str = SUBSCRIPTION_REFERRER,
    ) -> Request:
        schema = RequestSchema.build(SubscriptionQuerySettings)

        request = build_request(
            {"query": self.query},
            parse_snql_query,
            SubscriptionQuerySettings,
            schema,
            dataset,
            timer,
            referrer,
            [
                self.entity_subscription.validate_query,
                partial(self.add_conditions, timestamp, offset),
            ],
        )
        return request
Esempio n. 3
0
def snql_dataset_query_view(*, dataset: Dataset, timer: Timer) -> Union[Response, str]:
    if http_request.method == "GET":
        schema = RequestSchema.build(HTTPQuerySettings)
        return render_template(
            "query.html",
            query_template=json.dumps(schema.generate_template(), indent=4),
        )
    elif http_request.method == "POST":
        body = parse_request_body(http_request)
        _trace_transaction(dataset)
        return dataset_query(dataset, body, timer)
    else:
        assert False, "unexpected fallthrough"
Esempio n. 4
0
def test_build_request(body: MutableMapping[str, Any],
                       condition: Expression) -> None:
    dataset = get_dataset("events")
    entity = dataset.get_default_entity()
    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer("test"),
        "my_request",
    )

    expected_query = Query(
        from_clause=Entity(EntityKey.EVENTS, entity.get_data_model()),
        selected_columns=[
            SelectedExpression(
                name="time",
                expression=Column(alias="_snuba_time",
                                  table_name=None,
                                  column_name="time"),
            ),
            SelectedExpression("count",
                               FunctionCall("_snuba_count", "count", tuple())),
        ],
        condition=condition,
        groupby=[Column("_snuba_time", None, "time")],
        limit=1000,
        granularity=60,
    )

    assert request.referrer == "my_request"
    assert dict(request.original_body) == body
    status, differences = request.query.equals(expected_query)
    assert status == True, f"Query mismatch: {differences}"
Esempio n. 5
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
Esempio n. 6
0
def test_nullable_field_casting(entity: Entity,
                                expected_table_name: str) -> None:
    dataset_name = "discover"

    query_str = """MATCH (discover)
    SELECT
        uniq(sdk_version)
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        # The only reason this extends StringifyVisitor is because it has all the other
        # visit methods implemented.
        class NullCastingVerifier(StringifyVisitor):
            def __init__(self) -> None:
                self.sdk_version_cast_to_null = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> str:
                if (exp.function_name == "cast"
                        and exp.alias == "_snuba_sdk_version"
                        and exp.parameters == (
                            Column(None, None, "sdk_version"),
                            Literal(None, "Nullable(String)"),
                        )):
                    self.sdk_version_cast_to_null = True
                return super().visit_function_call(exp)

        for select_expr in query.get_selected_columns():
            verifier = NullCastingVerifier()
            select_expr.expression.accept(verifier)
            assert verifier.sdk_version_cast_to_null

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
Esempio n. 7
0
def test_span_id_promotion(entity: Entity, expected_table_name: str) -> None:
    """In order to save space in the contexts column and provide faster query
    performance, we promote span_id to a proper column and don't store it in the
    actual contexts object in the DB.

    The client however, still queries by `contexts[trace.span_id]` and expects that
    it is a hex string rather than a 64 bit uint (which is what we store it as)

    This test makes sure that our query pipeline will do the proper column promotion and conversion
    """

    dataset_name = "discover"

    # The client queries by contexts[trace.span_id] even though that's not how we store it
    query_str = f"""MATCH (discover)
    SELECT
        contexts[trace.span_id]
    WHERE
        timestamp >= toDateTime('2021-07-25T15:02:10') AND
        timestamp < toDateTime('2021-07-26T15:02:10') AND
        contexts[trace.span_id] = '{span_id_hex}' AND
        project_id IN tuple(5492900)
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )

    # --------------------------------------------------------------------

    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert isinstance(query, Query)
        # in local and CI there's a table name difference
        # errors_local vs errors_dist and discover_local vs discover_dist
        # so we check using `in` instead of `==`
        assert expected_table_name in query.get_from_clause().table_name
        assert query.get_selected_columns() == [
            SelectedExpression(
                name="contexts[trace.span_id]",
                # the select converts the span_id into a lowecase hex string
                expression=FunctionCall(
                    "_snuba_contexts[trace.span_id]",
                    "lower",
                    (FunctionCall(None, "hex",
                                  (Column(None, None, "span_id"), )), ),
                ),
            )
        ]

        class SpanIdVerifier(NoopVisitor):
            def __init__(self) -> None:
                self.found_span_condition = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> None:
                if exp.function_name == "equals" and exp.parameters[
                        0] == Column(None, None, "span_id"):
                    self.found_span_condition = True
                    # and here we can see that the hex string the client queried us with
                    # has been converted to the correct uint64
                    assert exp.parameters[1] == Literal(
                        None, span_id_as_uint64)
                return super().visit_function_call(exp)

        verifier = SpanIdVerifier()
        condition = query.get_condition()
        assert condition is not None
        condition.accept(verifier)
        assert verifier.found_span_condition

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier).execute()
Esempio n. 8
0
def test_tags_hashmap_optimization() -> None:
    entity = get_entity(EntityKey.DISCOVER)
    dataset_name = "discover"
    query_str = """
    MATCH (discover)
    SELECT count() AS count
    WHERE
        timestamp >= toDateTime('2021-07-12T19:45:01') AND
        timestamp < toDateTime('2021-08-11T19:45:01') AND
        project_id IN tuple(300688)
        AND ifNull(tags[duration_group], '') != '' AND
        ifNull(tags[duration_group], '') = '<10s'
    LIMIT 50
    """

    # ----- create the request object as if it came in through our API -----
    query_body = {
        "query": query_str,
        "debug": True,
        "dataset": dataset_name,
        "turbo": False,
        "consistent": False,
    }

    dataset = get_dataset(dataset_name)

    schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        query_body,
        parse_snql_query,
        HTTPQuerySettings,
        schema,
        dataset,
        Timer(name="bloop"),
        "some_referrer",
    )
    # --------------------------------------------------------------------

    def query_verifier(query: Query, settings: QuerySettings, reader: Reader) -> None:
        class ConditionVisitor(NoopVisitor):
            def __init__(self) -> None:
                self.found_hashmap_condition = False

            def visit_function_call(self, exp: FunctionCall) -> None:
                assert exp.function_name != "arrayElement"
                if (
                    exp.function_name == "has"
                    and isinstance(exp.parameters[0], Column)
                    and exp.parameters[0].column_name == "_tags_hash_map"
                ):
                    self.found_hashmap_condition = True
                return super().visit_function_call(exp)

        visitor = ConditionVisitor()
        query.get_condition().accept(visitor)
        assert visitor.found_hashmap_condition

    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_verifier
    ).execute()