Ejemplo n.º 1
0
def test_failures(
    query_body: MutableMapping[str, Any],
    expected_exception: Type[InvalidQueryException],
) -> None:
    with pytest.raises(expected_exception):
        events = get_dataset("events")
        parse_query(query_body, events)
Ejemplo n.º 2
0
def test_shadowing() -> None:
    with pytest.raises(AliasShadowingException):
        parse_query(
            {
                "selected_columns": [
                    ["f1", ["column1", "column2"], "f1_alias"],
                    ["f2", [], "f2_alias"],
                ],
                "aggregations": [["testF", ["platform", "field2"], "f1_alias"
                                  ]  # Shadowing!
                                 ],
            },
            get_dataset("events"),
        )
Ejemplo n.º 3
0
def test_find_projects(
    query_body: MutableMapping[str, Any], expected_projects: Set[int]
) -> None:
    events = get_dataset("events")
    query = identity_translate(parse_query(query_body, events))
    project_ids_ast = get_project_ids_in_query_ast(query, "project_id")
    assert project_ids_ast == expected_projects
Ejemplo n.º 4
0
def test_get_time_range() -> None:
    """
    Test finding the time range of a query.
    """
    body = {
        "selected_columns": ["event_id"],
        "conditions": [
            # Cannot test complex conditions based on explicit calls
            # the `and` and `or` functions, because they would not be
            # parsed as datetime by the old parser.
            ("timestamp", ">=", "2019-09-18T10:00:00"),
            ("timestamp", ">=", "2000-09-18T10:00:00"),
            ("timestamp", "<", "2019-09-19T12:00:00"),
            [("timestamp", "<", "2019-09-18T12:00:00"),
             ("project_id", "IN", [1])],
            ("project_id", "IN", [1]),
        ],
    }

    events = get_dataset("events")
    query = parse_query(body, events)
    processors = events.get_default_entity().get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(query, HTTPRequestSettings())

    from_date_ast, to_date_ast = get_time_range(identity_translate(query),
                                                "timestamp")
    assert (from_date_ast is not None and isinstance(from_date_ast, datetime)
            and from_date_ast.isoformat() == "2019-09-18T10:00:00")
    assert (to_date_ast is not None and isinstance(to_date_ast, datetime)
            and to_date_ast.isoformat() == "2019-09-19T12:00:00")
Ejemplo n.º 5
0
def test_col_split_conditions(
    id_column: str, project_column: str, timestamp_column: str, query, expected_result
) -> None:
    dataset = get_dataset("events")
    query = parse_query(query, dataset)
    splitter = ColumnSplitQueryStrategy(id_column, project_column, timestamp_column)
    request = Request("a", query, HTTPRequestSettings(), {}, "r")
    entity = get_entity(query.get_from_clause().key)
    plan = entity.get_query_plan_builder().build_plan(request)

    def do_query(
        query: ClickhouseQuery, request_settings: RequestSettings,
    ) -> QueryResult:
        return QueryResult(
            {
                "data": [
                    {
                        id_column: "asd123",
                        project_column: 123,
                        timestamp_column: "2019-10-01 22:33:42",
                    }
                ]
            },
            {},
        )

    assert (
        splitter.execute(plan.query, HTTPRequestSettings(), do_query) is not None
    ) == expected_result
Ejemplo n.º 6
0
def test_get_time_range() -> None:
    """
    Test finding the time range of a query.
    """
    body = {
        "selected_columns": ["event_id"],
        "conditions": [
            ("timestamp", ">=", "2019-09-18T10:00:00"),
            ("timestamp", ">=", "2000-09-18T10:00:00"),
            ("timestamp", "<", "2019-09-19T12:00:00"),
            [("timestamp", "<", "2019-09-18T12:00:00"),
             ("project_id", "IN", [1])],
            ("project_id", "IN", [1]),
        ],
    }

    events = get_dataset("events")
    query = parse_query(body, events)
    processors = events.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(query, HTTPRequestSettings())

    from_date_ast, to_date_ast = get_time_range(ClickhouseQuery(query),
                                                "timestamp")
    assert (from_date_ast is not None and isinstance(from_date_ast, datetime)
            and from_date_ast.isoformat() == "2019-09-18T10:00:00")
    assert (to_date_ast is not None and isinstance(to_date_ast, datetime)
            and to_date_ast.isoformat() == "2019-09-19T12:00:00")
Ejemplo n.º 7
0
    def validate(self, value, dataset: Dataset, referrer: str) -> Request:
        value = validate_jsonschema(value, self.__composite_schema)

        query_body = {
            key: value.pop(key)
            for key in self.__query_schema["properties"].keys() if key in value
        }
        settings = {
            key: value.pop(key)
            for key in self.__settings_schema["properties"].keys()
            if key in value
        }

        extensions = {}
        for extension_name, extension_schema in self.__extension_schemas.items(
        ):
            extensions[extension_name] = {
                key: value.pop(key)
                for key in extension_schema["properties"].keys()
                if key in value
            }

        query = parse_query(query_body, dataset)
        return Request(query, self.__setting_class(**settings), extensions,
                       referrer)
Ejemplo n.º 8
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": ""})
    mock_query_runner = Mock(return_value=query_result)

    def callback_func(args: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "selected_columns": ["type", "project_id"],
    }

    events = get_dataset("events")
    query = parse_query(query_body, events)

    events_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.EVENTS)), )

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "events": events_pipeline,
            "errors": errors_pipeline
        },
        selector_func=lambda query, referrer: ("events", ["errors"]),
        callback_func=mock_callback,
    )

    with cv:
        request_settings = HTTPRequestSettings()
        delegator.build_execution_pipeline(
            Request(
                "",
                query_body,
                query,
                request_settings,
                "ref",
            ),
            mock_query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert mock_query_runner.call_count == 2

    assert mock_callback.call_args == call(
        query,
        request_settings,
        "ref",
        [
            Result("events", query_result, ANY),
            Result("errors", query_result, ANY)
        ],
    )
Ejemplo n.º 9
0
    def validate(self, value, dataset: Dataset, referrer: str) -> Request:
        try:
            value = validate_jsonschema(value, self.__composite_schema)
        except jsonschema.ValidationError as error:
            raise JsonSchemaValidationException(str(error)) from error

        query_body = {
            key: value.pop(key)
            for key in self.__query_schema["properties"].keys() if key in value
        }
        settings = {
            key: value.pop(key)
            for key in self.__settings_schema["properties"].keys()
            if key in value
        }

        extensions = {}
        for extension_name, extension_schema in self.__extension_schemas.items(
        ):
            extensions[extension_name] = {
                key: value.pop(key)
                for key in extension_schema["properties"].keys()
                if key in value
            }

        query = parse_query(query_body, dataset)

        request_id = uuid.uuid4().hex
        return Request(request_id, query, self.__setting_class(**settings),
                       extensions, referrer)
Ejemplo n.º 10
0
def test_data_source(
    query_body: MutableMapping[str, Any],
    expected_entity: EntityKey,
) -> None:
    dataset = get_dataset("discover")
    query = parse_query(query_body, dataset)

    assert query.get_from_clause().key == expected_entity
Ejemplo n.º 11
0
def test_format_expressions(query_body: MutableMapping[str, Any],
                            expected_query: Query) -> None:
    state.set_config("query_parsing_expand_aliases", 1)
    events = get_dataset("events")
    query = parse_query(query_body, events)

    eq, reason = query.equals(expected_query)
    assert eq, reason
Ejemplo n.º 12
0
def parse_legacy_query(
    request_parts: RequestParts,
    settings: RequestSettings,
    dataset: Dataset,
) -> Union[Query, CompositeQuery[Entity]]:
    query = parse_query(request_parts.query, dataset)
    apply_query_extensions(query, request_parts.extensions, settings)
    return query
Ejemplo n.º 13
0
def test_circular_aliases() -> None:
    with pytest.raises(CyclicAliasException):
        parse_query(
            {
                "selected_columns": [
                    ["f1", ["column1", "f2"], "f1"],
                    ["f2", ["f1"], "f2"],
                ],
            },
            get_dataset("events"),
        )

    with pytest.raises(CyclicAliasException):
        parse_query(
            {"selected_columns": [["f1", [["f2", ["c"], "f2"]], "c"]]},
            get_dataset("events"),
        )
Ejemplo n.º 14
0
def test_alias_validation(query_body: MutableMapping[str, Any],
                          expected_result: bool) -> None:
    events = get_dataset("events")
    query = parse_query(query_body, events)
    query_plan = events.get_query_plan_builder().build_plan(
        Request("", query, HTTPRequestSettings(), {}, ""))

    assert query_plan.query.validate_aliases() == expected_result
Ejemplo n.º 15
0
def test_tags_processor(query_body, expected_query) -> None:
    state.set_config("ast_tag_processor_enabled", 1)
    dataset = get_dataset("transactions")
    query = parse_query(query_body, dataset)
    request_settings = HTTPRequestSettings()

    assert (DictClickhouseQuery(
        dataset, query, request_settings).format_sql() == expected_query)
Ejemplo n.º 16
0
def test_sessions_processing() -> None:
    query_body = {
        "selected_columns": ["duration_quantiles", "sessions", "users"],
        "conditions": [
            ["org_id", "=", 1],
            ["project_id", "=", 1],
            ["started", ">", "2020-01-01 12:00:00"],
        ],
    }

    sessions = get_dataset("sessions")
    query = parse_query(query_body, sessions)
    request = Request("", query_body, query, HTTPRequestSettings(), "")

    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader) -> QueryResult:
        quantiles = tuple(
            Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1])
        assert query.get_selected_columns() == [
            SelectedExpression(
                "duration_quantiles",
                CurriedFunctionCall(
                    "_snuba_duration_quantiles",
                    FunctionCall(
                        None,
                        "quantilesIfMerge",
                        quantiles,
                    ),
                    (Column(None, None, "duration_quantiles"), ),
                ),
            ),
            SelectedExpression(
                "sessions",
                FunctionCall(
                    "_snuba_sessions",
                    "plus",
                    (
                        FunctionCall(None, "countIfMerge",
                                     (Column(None, None, "sessions"), )),
                        FunctionCall(
                            None,
                            "sumIfMerge",
                            (Column(None, None, "sessions_preaggr"), ),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                "users",
                FunctionCall("_snuba_users", "uniqIfMerge",
                             (Column(None, None, "users"), )),
            ),
        ]
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
Ejemplo n.º 17
0
def test_time_split_ast() -> None:
    """
    Test that the time split transforms the query properly both on the old representation
    and on the AST representation.
    """
    found_timestamps = []

    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
    ) -> QueryResult:
        from_date_ast, to_date_ast = get_time_range(query, "timestamp")
        assert from_date_ast is not None and isinstance(
            from_date_ast, datetime)
        assert to_date_ast is not None and isinstance(to_date_ast, datetime)

        found_timestamps.append(
            (from_date_ast.isoformat(), to_date_ast.isoformat()))

        return QueryResult({"data": []}, {})

    body = {
        "selected_columns": [
            "event_id",
            "level",
            "logger",
            "server_name",
            "transaction",
            "timestamp",
            "project_id",
        ],
        "conditions": [
            ("timestamp", ">=", "2019-09-18T10:00:00"),
            ("timestamp", "<", "2019-09-19T12:00:00"),
            ("project_id", "IN", [1]),
        ],
        "limit":
        10,
        "orderby": ["-timestamp"],
    }

    query = parse_query(body, get_dataset("events"))
    entity = get_entity(query.get_from_clause().key)
    settings = HTTPRequestSettings()
    for p in entity.get_query_processors():
        p.process_query(query, settings)

    clickhouse_query = identity_translate(query)
    splitter = TimeSplitQueryStrategy("timestamp")
    splitter.execute(clickhouse_query, settings, do_query)

    assert found_timestamps == [
        ("2019-09-19T11:00:00", "2019-09-19T12:00:00"),
        ("2019-09-19T01:00:00", "2019-09-19T11:00:00"),
        ("2019-09-18T10:00:00", "2019-09-19T01:00:00"),
    ]
Ejemplo n.º 18
0
def test_find_projects(query_body: MutableMapping[str, Any],
                       expected_projects: Set[int]) -> None:
    events = get_dataset("events")
    query = parse_query(query_body, events)

    query = ClickhouseQuery(query)
    project_ids = get_project_ids_in_query(query, "project_id")
    assert project_ids == expected_projects

    project_ids_ast = get_project_ids_in_query_ast(query, "project_id")
    assert project_ids_ast == expected_projects
Ejemplo n.º 19
0
def parse_and_process(query_body: MutableMapping[str, Any]) -> ClickhouseQuery:
    dataset = get_dataset("transactions")
    query = parse_query(query_body, dataset)
    request = Request("a", query, HTTPRequestSettings(), {}, "r")
    for p in dataset.get_query_processors():
        p.process_query(query, request.settings)
    plan = dataset.get_query_plan_builder().build_plan(request)

    ArrayJoinKeyValueOptimizer("tags").process_query(plan.query,
                                                     request.settings)
    return plan.query
Ejemplo n.º 20
0
def test_alias_validation(query_body: MutableMapping[str, Any],
                          expected_result: bool) -> None:
    events = get_dataset("events")
    query = parse_query(query_body, events)
    settings = HTTPRequestSettings()
    query_plan = (
        events.get_default_entity().get_query_pipeline_builder().build_planner(
            query, settings)).build_best_plan()
    execute_all_clickhouse_processors(query_plan, settings)

    assert query_plan.query.validate_aliases() == expected_result
Ejemplo n.º 21
0
def test_nested_optimizer(query_body, expected_condition) -> None:
    query = parse_query(query_body, get_dataset("transactions"))
    request_settings = HTTPRequestSettings()
    processor = NestedFieldConditionOptimizer(
        nested_col="tags",
        flattened_col="tags_map",
        timestamp_cols={"start_ts", "finish_ts"},
        beginning_of_time=datetime(2019, 12, 11, 0, 0, 0),
    )
    processor.process_query(query, request_settings)

    assert query.get_conditions() == expected_condition
Ejemplo n.º 22
0
    def validate(
        self, value: MutableMapping[str, Any], dataset: Dataset, referrer: str
    ) -> Request:
        try:
            value = validate_jsonschema(value, self.__composite_schema)
        except jsonschema.ValidationError as error:
            raise JsonSchemaValidationException(str(error)) from error

        query_body = {
            key: value.pop(key)
            for key in self.__query_schema["properties"].keys()
            if key in value
        }
        settings = {
            key: value.pop(key)
            for key in self.__settings_schema["properties"].keys()
            if key in value
        }

        class_name = self.__setting_class
        if isinstance(class_name, type(HTTPRequestSettings)):
            settings_obj: Union[
                HTTPRequestSettings, SubscriptionRequestSettings
            ] = class_name(**settings)
        elif isinstance(class_name, type(SubscriptionRequestSettings)):
            settings_obj = class_name()

        extensions = {}
        for extension_name, extension_schema in self.__extension_schemas.items():
            extensions[extension_name] = {
                key: value.pop(key)
                for key in extension_schema["properties"].keys()
                if key in value
            }

        if self.__language == Language.SNQL:
            query = parse_snql_query(query_body["query"], dataset)
        else:
            query = parse_query(query_body, dataset)
            apply_query_extensions(query, extensions, settings_obj)

        request_id = uuid.uuid4().hex
        return Request(
            request_id,
            # TODO: Replace this with the actual query raw body.
            # this can have an impact on subscriptions so we need
            # to be careful with the change.
            ChainMap(query_body, *extensions.values()),
            query,
            settings_obj,
            referrer,
        )
Ejemplo n.º 23
0
def test_select_storage(query_body: MutableMapping[str, Any],
                        expected_table: str) -> None:
    sessions = get_dataset("sessions")
    query = parse_query(query_body, sessions)
    request = Request("", query_body, query, HTTPRequestSettings(), "")

    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader) -> QueryResult:
        assert query.get_from_clause().table_name == expected_table
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
Ejemplo n.º 24
0
def test_events_processing() -> None:
    query_body = {
        "selected_columns": ["tags[transaction]", "contexts[browser.name]"]
    }

    events_dataset = get_dataset("events")
    events_entity = events_dataset.get_default_entity()
    events_storage = events_entity.get_writable_storage()

    query = parse_query(query_body, events_dataset)
    request = Request("", query_body, query, HTTPRequestSettings(), "")

    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader) -> QueryResult:

        if events_storage.get_storage_key() == StorageKey.EVENTS:
            transaction_col_name = "transaction"
        else:
            transaction_col_name = "transaction_name"

        assert query.get_selected_columns_from_ast() == [
            SelectedExpression(
                "tags[transaction]",
                Column("_snuba_tags[transaction]", None, transaction_col_name),
            ),
            SelectedExpression(
                "contexts[browser.name]",
                FunctionCall(
                    "_snuba_contexts[browser.name]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "browser.name"),
                            ),
                        ),
                    ),
                ),
            ),
        ]
        return QueryResult({}, {})

    events_entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()
Ejemplo n.º 25
0
def parse_and_process(query_body: MutableMapping[str, Any]) -> ClickhouseQuery:
    dataset = get_dataset("transactions")
    query = parse_query(query_body, dataset)
    request = Request("a", query_body, query, HTTPRequestSettings(), "r")
    entity = get_entity(query.get_from_clause().key)
    for p in entity.get_query_processors():
        p.process_query(query, request.settings)

    ArrayJoinKeyValueOptimizer("tags").process_query(query, request.settings)

    query_plan = SingleStorageQueryPlanBuilder(
        storage=entity.get_writable_storage(),
        mappers=transaction_translator,
    ).build_and_rank_plans(query, request.settings)[0]

    return query_plan.query
Ejemplo n.º 26
0
def test_prewhere(
    query_body: MutableMapping[str, Any],
    keys: Sequence[str],
    new_ast_condition: Optional[Expression],
    new_prewhere_ast_condition: Optional[Expression],
) -> None:
    settings.MAX_PREWHERE_CONDITIONS = 2
    events = get_dataset("events")
    query = parse_query(query_body, events)
    query.set_data_source(TableSource("my_table", ColumnSet([]), None, keys))

    request_settings = HTTPRequestSettings()
    processor = PrewhereProcessor()
    processor.process_query(Query(query), request_settings)

    assert query.get_condition_from_ast() == new_ast_condition
    assert query.get_prewhere_ast() == new_prewhere_ast_condition
Ejemplo n.º 27
0
def test_nested_optimizer(query_body, expected_condition) -> None:
    transactions = get_dataset("transactions")
    query = parse_query(query_body, transactions)
    request_settings = HTTPRequestSettings()
    request = Request("", query, request_settings, {}, "")

    query_plan = transactions.get_query_plan_builder().build_plan(request)
    processor = NestedFieldConditionOptimizer(
        nested_col="tags",
        flattened_col="tags_map",
        timestamp_cols={"start_ts", "finish_ts"},
        beginning_of_time=datetime(2019, 12, 11, 0, 0, 0),
    )
    clickhouse_query = query_plan.query
    processor.process_query(clickhouse_query, request_settings)

    assert clickhouse_query.get_conditions() == expected_condition
Ejemplo n.º 28
0
def test_format_expressions(query_body: MutableMapping[str, Any],
                            expected_query: Query) -> None:
    events = get_dataset("events")
    query = parse_query(query_body, events)

    # We cannot just run == on the query objects. The content of the two
    # objects is different, being one the AST and the ont the AST + raw body
    assert (query.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast(
    )
    assert query.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
    assert query.get_arrayjoin_from_ast(
    ) == expected_query.get_arrayjoin_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast(
    )
Ejemplo n.º 29
0
def test_data_source(
    query_body: MutableMapping[str, Any], expected_table: str,
) -> None:
    request_settings = HTTPRequestSettings()
    dataset = get_dataset("discover")
    query = parse_query(query_body, dataset)
    request = Request("a", query, request_settings, {}, "r")
    for processor in get_dataset("discover").get_query_processors():
        processor.process_query(request.query, request.settings)

    plan = dataset.get_query_plan_builder().build_plan(request)

    for physical_processor in plan.plan_processors:
        physical_processor.process_query(plan.query, request.settings)

    assert plan.query.get_data_source().format_from() == expected_table, json.dumps(
        query_body
    )
Ejemplo n.º 30
0
def test_sessions_processing() -> None:
    query_body = {
        "selected_columns": ["duration_quantiles", "sessions", "users"]
    }

    sessions = get_dataset("sessions")
    query = parse_query(query_body, sessions)
    request = Request("", query, HTTPRequestSettings(), {}, "")

    query_plan = (sessions.get_default_entity().get_query_plan_builder().
                  build_plan(request))
    for clickhouse_processor in query_plan.plan_processors:
        clickhouse_processor.process_query(query_plan.query, request.settings)

    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader[SqlQuery]) -> QueryResult:
        assert query.get_selected_columns_from_ast() == [
            SelectedExpression(
                "duration_quantiles",
                CurriedFunctionCall(
                    "duration_quantiles",
                    FunctionCall(
                        None,
                        "quantilesIfMerge",
                        (Literal(None, 0.5), Literal(None, 0.9)),
                    ),
                    (Column(None, None, "duration_quantiles"), ),
                ),
            ),
            SelectedExpression(
                "sessions",
                FunctionCall("sessions", "countIfMerge",
                             (Column(None, None, "sessions"), )),
            ),
            SelectedExpression(
                "users",
                FunctionCall("users", "uniqIfMerge",
                             (Column(None, None, "users"), )),
            ),
        ]
        return QueryResult({}, {})

    query_plan.execution_strategy.execute(query_plan.query, request.settings,
                                          query_runner)