예제 #1
0
def test_col_split_conditions(
    id_column: str, project_column: str, timestamp_column: str, query, expected_result
) -> None:
    dataset = get_dataset("events")
    query = parse_query(query, dataset)
    splitter = ColumnSplitQueryStrategy(id_column, project_column, timestamp_column)
    request = Request("a", query, HTTPRequestSettings(), {}, "r")
    entity = get_entity(query.get_from_clause().key)
    plan = entity.get_query_plan_builder().build_plan(request)

    def do_query(
        query: ClickhouseQuery, request_settings: RequestSettings,
    ) -> QueryResult:
        return QueryResult(
            {
                "data": [
                    {
                        id_column: "asd123",
                        project_column: 123,
                        timestamp_column: "2019-10-01 22:33:42",
                    }
                ]
            },
            {},
        )

    assert (
        splitter.execute(plan.query, HTTPRequestSettings(), do_query) is not None
    ) == expected_result
예제 #2
0
def test_no_split(dataset_name: str, id_column: str, project_column: str,
                  timestamp_column: str) -> None:
    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        events.get_default_entity().get_all_storages()
        [0].get_schema().get_data_source(), )

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader,
    ) -> QueryResult:
        assert query == query
        return QueryResult({}, {})

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(
                id_column=id_column,
                project_column=project_column,
                timestamp_column=timestamp_column,
            ),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
예제 #3
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        selected_cols = query.get_selected_columns()
        assert selected_cols == [
            c.expression.column_name
            for c in query.get_selected_columns_from_ast() or []
            if isinstance(c.expression, Column)
        ]
        if selected_cols == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_cols == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(f"Unexpected selected columns: {selected_cols}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": list(second_query_data[0].keys()),
                "conditions": [""],
                "orderby": "events.event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
            selected_columns=[
                SelectedExpression(name=col_name,
                                   expression=Column(None, None, col_name))
                for col_name in second_query_data[0].keys()
            ],
        ))

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
예제 #4
0
def test_col_split_conditions(id_column: str, project_column: str,
                              timestamp_column: str, query,
                              expected_result) -> None:
    dataset = get_dataset("events")
    query = parse_query(query, dataset)
    splitter = ColumnSplitQueryStrategy(id_column, project_column,
                                        timestamp_column)

    def do_query(query: ClickhouseQuery,
                 request_settings: RequestSettings = None) -> QueryResult:
        return QueryResult(
            {
                "data": [{
                    id_column: "asd123",
                    project_column: 123,
                    timestamp_column: "2019-10-01 22:33:42",
                }]
            },
            {},
        )

    assert (splitter.execute(query, HTTPRequestSettings(), do_query)
            is not None) == expected_result
예제 #5
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        query_settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        selected_col_names = [
            c.expression.column_name
            for c in query.get_selected_columns() or []
            if isinstance(c.expression, Column)
        ]
        if selected_col_names == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_col_names == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(
                f"Unexpected selected columns: {selected_col_names}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        events.get_default_entity().get_all_storages()
        [0].get_schema().get_data_source(),
        selected_columns=[
            SelectedExpression(name=col_name,
                               expression=Column(None, None, col_name))
            for col_name in second_query_data[0].keys()
        ],
    )

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPQuerySettings(), do_query)
예제 #6
0
def test_set_limit_on_split_query():
    storage = get_dataset("events").get_default_entity().get_all_storages()[0]
    query = ClickhouseQuery(
        Table("events",
              storage.get_schema().get_columns()),
        selected_columns=[
            SelectedExpression(col.name, Column(None, None, col.name))
            for col in storage.get_schema().get_columns()
        ],
        limit=420,
    )

    query_run_count = 0

    def do_query(query: ClickhouseQuery,
                 query_settings: QuerySettings) -> QueryResult:
        nonlocal query_run_count
        query_run_count += 1
        if query_run_count == 1:
            return QueryResult(
                result={
                    "data": [
                        {
                            "event_id": "a",
                            "project_id": "1",
                            "timestamp": " 2019-10-01 22:33:42",
                        },
                        {
                            "event_id": "a",
                            "project_id": "1",
                            "timestamp": " 2019-10-01 22:44:42",
                        },
                    ]
                },
                extra={},
            )
        else:
            assert query.get_limit() == 2
            return QueryResult({}, {})

    ColumnSplitQueryStrategy(
        id_column="event_id",
        project_column="project_id",
        timestamp_column="timestamp",
    ).execute(query, HTTPQuerySettings(), do_query)
    assert query_run_count == 2
예제 #7
0
파일: test_split.py 프로젝트: alexef/snuba
def test_no_split(
    dataset_name: str, id_column: str, project_column: str, timestamp_column: str
) -> None:
    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": ["event_id"],
                "conditions": [""],
                "orderby": "event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
        )
    )

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        assert query == query
        return QueryResult({}, {})

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80, set(), True),
        [],
        [
            ColumnSplitQueryStrategy(
                id_column=id_column,
                project_column=project_column,
                timestamp_column=timestamp_column,
            ),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
예제 #8
0
                    "sentry:dist": "dist",
                    "sentry:user": "******",
                },
                "contexts": {"trace.trace_id": "trace_id"},
            }
        ),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        ArrayJoinKeyValueOptimizer("tags"),
        UUIDColumnProcessor(set(["event_id", "trace_id"])),
        EventsBooleanContextsProcessor(),
        PrewhereProcessor(
            [
                "event_id",
                "release",
                "message",
                "transaction_name",
                "environment",
                "project_id",
            ]
        ),
    ],
    query_splitters=[
        ColumnSplitQueryStrategy(
            id_column="event_id",
            project_column="project_id",
            timestamp_column="timestamp",
        ),
        TimeSplitQueryStrategy(timestamp_col="timestamp"),
    ],
)