예제 #1
0
def test_select_storage(query_body: MutableMapping[str, Any],
                        is_subscription: bool, expected_table: str) -> None:
    sessions = get_dataset("sessions")
    snql_query = json_to_snql(query_body, "sessions")
    query, snql_anonymized = parse_snql_query(str(snql_query), sessions)
    query_body = json.loads(snql_query.snuba())
    subscription_settings = (SubscriptionQuerySettings
                             if is_subscription else HTTPQuerySettings)

    request = Request(
        id="a",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=subscription_settings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("default"), "blah", None,
                                         None, None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        assert query.get_from_clause().table_name == expected_table
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
예제 #2
0
def test_events_processing() -> None:
    query_body = {
        "query": """
        MATCH (events)
        SELECT tags[transaction], contexts[browser.name]
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events_dataset = get_dataset("events")
    events_entity = events_dataset.get_default_entity()

    query, snql_anonymized = parse_snql_query(query_body["query"],
                                              events_dataset)
    request = Request(
        id="",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("blah"), "blah", None,
                                         None, None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        assert query.get_selected_columns() == [
            SelectedExpression(
                "tags[transaction]",
                Column("_snuba_tags[transaction]", None, "transaction_name"),
            ),
            SelectedExpression(
                "contexts[browser.name]",
                FunctionCall(
                    "_snuba_contexts[browser.name]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "browser.name"),
                            ),
                        ),
                    ),
                ),
            ),
        ]
        return QueryResult({}, {})

    events_entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()
예제 #3
0
def build_request(
    body: MutableMapping[str, Any],
    parser: Parser,
    settings_class: Union[Type[HTTPQuerySettings], Type[SubscriptionQuerySettings]],
    schema: RequestSchema,
    dataset: Dataset,
    timer: Timer,
    referrer: str,
    custom_processing: Optional[CustomProcessors] = None,
) -> Request:
    with sentry_sdk.start_span(description="build_request", op="validate") as span:
        try:
            request_parts = schema.validate(body)
            if settings_class == HTTPQuerySettings:
                query_settings: MutableMapping[str, bool | str] = {
                    **request_parts.query_settings,
                    "consistent": _consistent_override(
                        request_parts.query_settings.get("consistent", False), referrer
                    ),
                }
                query_settings["referrer"] = referrer
                # TODO: referrer probably doesn't need to be passed in, it should be from the body
                settings_obj: Union[
                    HTTPQuerySettings, SubscriptionQuerySettings
                ] = settings_class(
                    **query_settings,
                )
            elif settings_class == SubscriptionQuerySettings:
                settings_obj = settings_class(
                    consistent=_consistent_override(True, referrer),
                )
            query, snql_anonymized = parser(
                request_parts, settings_obj, dataset, custom_processing
            )

            project_ids = get_object_ids_in_query_ast(query, "project_id")
            if project_ids is not None and len(project_ids) == 1:
                sentry_sdk.set_tag("snuba_project_id", project_ids.pop())

            org_ids = get_object_ids_in_query_ast(query, "org_id")
            if org_ids is not None and len(org_ids) == 1:
                sentry_sdk.set_tag("snuba_org_id", org_ids.pop())
            attribution_info = dict(request_parts.attribution_info)
            # TODO: clean this up
            attribution_info["app_id"] = get_app_id(
                request_parts.attribution_info["app_id"]
            )
            attribution_info["referrer"] = referrer

            request_id = uuid.uuid4().hex
            request = Request(
                id=request_id,
                # TODO: Replace this with the actual query raw body.
                # this can have an impact on subscriptions so we need
                # to be careful with the change.
                original_body=body,
                query=query,
                attribution_info=AttributionInfo(**attribution_info),
                query_settings=settings_obj,
                snql_anonymized=snql_anonymized,
            )
        except (InvalidJsonRequestException, InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data(
            "snuba_query_parsed",
            repr(query).split("\n"),
        )
        span.set_data(
            "snuba_query_raw",
            textwrap.wrap(repr(request.original_body), 100, break_long_words=False),
        )
        sentry_sdk.add_breadcrumb(
            category="query_info",
            level="info",
            message="snuba_query_raw",
            data={
                "query": textwrap.wrap(
                    repr(request.original_body), 100, break_long_words=False
                )
            },
        )

        timer.mark("validate_schema")
        return request
예제 #4
0
def test_transform_column_names() -> None:
    """
    Runs a simple query containing selected expressions names that
    do not match the aliases of the expressions themselves.
    It verifies that the names of the columns in the result correspond
    to the SelectedExpression names and not to the expression aliases
    (which are supposed to be internal).
    """
    events_storage = get_entity(EntityKey.EVENTS).get_writable_storage()
    assert events_storage is not None

    event_id = uuid.uuid4().hex

    event_date = datetime.utcnow()
    write_unprocessed_events(
        events_storage,
        [
            InsertEvent({
                "event_id":
                event_id,
                "group_id":
                10,
                "primary_hash":
                uuid.uuid4().hex,
                "project_id":
                1,
                "message":
                "a message",
                "platform":
                "python",
                "datetime":
                event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT),
                "data": {
                    "received": time.time()
                },
                "organization_id":
                1,
                "retention_days":
                settings.DEFAULT_RETENTION_DAYS,
            })
        ],
    )

    query = Query(
        Entity(EntityKey.EVENTS,
               get_entity(EntityKey.EVENTS).get_data_model()),
        selected_columns=[
            # The selected expression names are those provided by the
            # user in the query and those the user expect in the response.
            # Aliases will be internal names to prevent shadowing.
            SelectedExpression("event_id",
                               Column("_snuba_event_id", None, "event_id")),
            SelectedExpression(
                "message",
                FunctionCall(
                    "_snuba_message",
                    "ifNull",
                    (Column(None, None, "message"), Literal(None, "")),
                ),
            ),
        ],
    )
    query_settings = HTTPQuerySettings(referrer="asd")

    dataset = get_dataset("events")
    timer = Timer("test")

    result = parse_and_run_query(
        dataset,
        Request(
            id="asd",
            original_body={},
            query=query,
            snql_anonymized="",
            query_settings=query_settings,
            attribution_info=AttributionInfo(get_app_id("blah"), "blah", None,
                                             None, None),
        ),
        timer,
    )

    data = result.result["data"]
    assert data == [{"event_id": event_id, "message": "a message"}]
    meta = result.result["meta"]

    assert meta == [
        MetaColumn(name="event_id", type="String"),
        MetaColumn(name="message", type="String"),
    ]
예제 #5
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        Entity(EntityKey.EVENTS,
               get_entity(EntityKey.EVENTS).get_data_model()))

    request = Request(
        id=uuid.UUID("a" * 32).hex,
        original_body=request_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer="search"),
        attribution_info=AttributionInfo(get_app_id("default"), "search", None,
                                         None, None),
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        start_timestamp=datetime.utcnow() - timedelta(days=3),
        end_timestamp=datetime.utcnow(),
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                sql_anonymized=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100",
                start_timestamp=datetime.utcnow() - timedelta(days=3),
                end_timestamp=datetime.utcnow(),
                stats={
                    "sample": 10,
                    "error_code": 386
                },
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"},
                        mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
        projects={2},
        snql_anonymized=request.snql_anonymized,
        entity=EntityKey.EVENTS.value,
    ).to_dict()

    processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer().
                 get_stream_loader().get_processor())

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [{
            "request_id":
            str(uuid.UUID("a" * 32)),
            "request_body":
            '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
            "referrer":
            "search",
            "dataset":
            "events",
            "projects": [2],
            "organization":
            None,
            "timestamp":
            timer.for_json()["timestamp"],
            "duration_ms":
            10,
            "status":
            "success",
            "clickhouse_queries.sql": [
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
            ],
            "clickhouse_queries.status": ["success"],
            "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
            "clickhouse_queries.duration_ms": [0],
            "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'],
            "clickhouse_queries.final": [0],
            "clickhouse_queries.cache_hit": [0],
            "clickhouse_queries.sample": [10.0],
            "clickhouse_queries.max_threads": [0],
            "clickhouse_queries.num_days": [10],
            "clickhouse_queries.clickhouse_table": [""],
            "clickhouse_queries.query_id": [""],
            "clickhouse_queries.is_duplicate": [0],
            "clickhouse_queries.consistent": [0],
            "clickhouse_queries.all_columns": [["tags", "timestamp"]],
            "clickhouse_queries.or_conditions": [False],
            "clickhouse_queries.where_columns": [["timestamp"]],
            "clickhouse_queries.where_mapping_columns": [["tags"]],
            "clickhouse_queries.groupby_columns": [[]],
            "clickhouse_queries.array_join_columns": [[]],
        }],
        None,
    )
예제 #6
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}})

    def callback_func(primary: Optional[Tuple[str, QueryResult]],
                      other: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "query": """
        MATCH (events)
        SELECT type, project_id
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events = get_dataset("events")
    query, _ = parse_snql_query(query_body["query"], events)

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    errors_ro_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS_RO)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "errors": errors_pipeline,
            "errors_ro": errors_ro_pipeline,
        },
        selector_func=lambda query, referrer: ("errors", ["errors_ro"]),
        split_rate_limiter=True,
        ignore_secondary_exceptions=True,
        callback_func=mock_callback,
    )

    runner_call_count = 0
    runner_settings: MutableSequence[QuerySettings] = []

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        nonlocal runner_call_count
        nonlocal runner_settings

        runner_call_count += 1
        runner_settings.append(settings)
        return query_result

    set_config("pipeline_split_rate_limiter", 1)

    with cv:
        query_settings = HTTPQuerySettings(referrer="ref")
        delegator.build_execution_pipeline(
            Request(
                id="asd",
                original_body=query_body,
                query=query,
                snql_anonymized="",
                query_settings=query_settings,
                attribution_info=AttributionInfo(get_app_id("ref"), "ref",
                                                 None, None, None),
            ),
            query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert runner_call_count == 2
    assert len(runner_settings) == 2
    settings, settings_ro = runner_settings
    # Validate that settings have been duplicated
    assert id(settings) != id(settings_ro)

    assert mock_callback.call_args == call(
        query,
        query_settings,
        "ref",
        Result("errors", query_result, ANY),
        [Result("errors_ro", query_result, ANY)],
    )
예제 #7
0
def test_sessions_processing() -> None:
    query_body = {
        "query": """
        MATCH (sessions)
        SELECT duration_quantiles, sessions, users
        WHERE org_id = 1
        AND project_id = 1
        AND started >= toDateTime('2020-01-01T12:00:00')
        AND started < toDateTime('2020-01-02T12:00:00')
        """,
        "dataset": "sessions",
    }

    sessions = get_dataset("sessions")
    query, snql_anonymized = parse_snql_query(query_body["query"], sessions)
    request = Request(
        id="a",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("default"), "", None, None,
                                         None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        quantiles = tuple(
            Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1])
        assert query.get_selected_columns() == [
            SelectedExpression(
                "duration_quantiles",
                CurriedFunctionCall(
                    "_snuba_duration_quantiles",
                    FunctionCall(
                        None,
                        "quantilesIfMerge",
                        quantiles,
                    ),
                    (Column(None, None, "duration_quantiles"), ),
                ),
            ),
            SelectedExpression(
                "sessions",
                FunctionCall(
                    "_snuba_sessions",
                    "plus",
                    (
                        FunctionCall(None, "countIfMerge",
                                     (Column(None, None, "sessions"), )),
                        FunctionCall(
                            None,
                            "sumIfMerge",
                            (Column(None, None, "sessions_preaggr"), ),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                "users",
                FunctionCall("_snuba_users", "uniqIfMerge",
                             (Column(None, None, "users"), )),
            ),
        ]
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
예제 #8
0
def test_metrics_processing(
    entity_name: str,
    column_name: str,
    entity_key: EntityKey,
    translated_value: Expression,
) -> None:
    settings.ENABLE_DEV_FEATURES = True
    settings.DISABLED_DATASETS = set()

    importlib.reload(factory)
    importlib.reload(storage_factory)
    importlib.reload(cluster)

    query_body = {
        "query": (f"MATCH ({entity_name}) "
                  f"SELECT {column_name} BY org_id, project_id, tags[10] "
                  "WHERE "
                  "timestamp >= toDateTime('2021-05-17 19:42:01') AND "
                  "timestamp < toDateTime('2021-05-17 23:42:01') AND "
                  "org_id = 1 AND "
                  "project_id = 1"),
    }

    metrics_dataset = get_dataset("metrics")
    query, snql_anonymized = parse_snql_query(query_body["query"],
                                              metrics_dataset)

    request = Request(
        id="",
        original_body=query_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("blah"), "blah", None,
                                         None, None),
    )

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert query.get_selected_columns() == [
            SelectedExpression(
                "org_id",
                Column("_snuba_org_id", None, "org_id"),
            ),
            SelectedExpression(
                "project_id",
                Column("_snuba_project_id", None, "project_id"),
            ),
            SelectedExpression(
                "tags[10]",
                FunctionCall(
                    "_snuba_tags[10]",
                    "arrayElement",
                    (
                        Column(None, None, "tags.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (Column(None, None, "tags.key"), Literal(None,
                                                                     10)),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                column_name,
                translated_value,
            ),
        ]
        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity = get_entity(entity_key)
    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()