Exemple #1
0
def test_no_split(dataset_name: str, id_column: str, project_column: str,
                  timestamp_column: str) -> None:
    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        events.get_default_entity().get_all_storages()
        [0].get_schema().get_data_source(), )

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader,
    ) -> QueryResult:
        assert query == query
        return QueryResult({}, {})

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(
                id_column=id_column,
                project_column=project_column,
                timestamp_column=timestamp_column,
            ),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
Exemple #2
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        selected_cols = query.get_selected_columns()
        assert selected_cols == [
            c.expression.column_name
            for c in query.get_selected_columns_from_ast() or []
            if isinstance(c.expression, Column)
        ]
        if selected_cols == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_cols == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(f"Unexpected selected columns: {selected_cols}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": list(second_query_data[0].keys()),
                "conditions": [""],
                "orderby": "events.event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
            selected_columns=[
                SelectedExpression(name=col_name,
                                   expression=Column(None, None, col_name))
                for col_name in second_query_data[0].keys()
            ],
        ))

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
Exemple #3
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        query_settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        selected_col_names = [
            c.expression.column_name
            for c in query.get_selected_columns() or []
            if isinstance(c.expression, Column)
        ]
        if selected_col_names == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_col_names == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(
                f"Unexpected selected columns: {selected_col_names}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        events.get_default_entity().get_all_storages()
        [0].get_schema().get_data_source(),
        selected_columns=[
            SelectedExpression(name=col_name,
                               expression=Column(None, None, col_name))
            for col_name in second_query_data[0].keys()
        ],
    )

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPQuerySettings(), do_query)
Exemple #4
0
def build_plan(table_name: str, storage_set: StorageSetKey) -> ClickhouseQueryPlan:
    return ClickhouseQueryPlan(
        Query(Table(table_name, ColumnSet([]))),
        SimpleQueryPlanExecutionStrategy(
            get_cluster(storage_set), db_query_processors=[],
        ),
        storage_set,
        plan_query_processors=[],
        db_query_processors=[],
    )
Exemple #5
0
def test_no_split(
    dataset_name: str, id_column: str, project_column: str, timestamp_column: str
) -> None:
    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": ["event_id"],
                "conditions": [""],
                "orderby": "event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
        )
    )

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        assert query == query
        return QueryResult({}, {})

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80, set(), True),
        [],
        [
            ColumnSplitQueryStrategy(
                id_column=id_column,
                project_column=project_column,
                timestamp_column=timestamp_column,
            ),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)