def test_format_expressions(pre_format: Query, expected_query: Query) -> None:
    copy = deepcopy(pre_format)
    BasicFunctionsProcessor().process_query(copy, HTTPRequestSettings())
    assert (copy.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert copy.get_groupby_from_ast() == expected_query.get_groupby_from_ast()
    assert copy.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
Example #2
0
def test_project_extension_query_processing(
    raw_data: dict,
    expected_conditions: Sequence[Condition],
    expected_ast_conditions: Expression,
):
    extension = ProjectExtension(
        processor=ProjectExtensionProcessor(project_column="project_id")
    )
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)
    request_settings = HTTPRequestSettings()

    extension.get_processor().process_query(query, valid_data, request_settings)

    assert query.get_conditions() == expected_conditions
    assert query.get_condition_from_ast() == expected_ast_conditions
Example #3
0
def test_format_expressions(query_body: MutableMapping[str, Any],
                            expected_query: Query) -> None:
    events = get_dataset("events")
    query = parse_query(query_body, events)

    # We cannot just run == on the query objects. The content of the two
    # objects is different, being one the AST and the ont the AST + raw body
    assert (query.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast(
    )
    assert query.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
    assert query.get_arrayjoin_from_ast(
    ) == expected_query.get_arrayjoin_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast(
    )
Example #4
0
    def __get_filter_tags(self, query: Query) -> List[str]:
        """
        Identifies the tag names we can apply the arrayFilter optimization on.
        Which means: if the tags_key column is in the select clause and there are
        one or more top level conditions on the tags_key column.
        """
        if not state.get_config("ast_tag_processor_enabled", 0):
            return []

        select_clause = query.get_selected_columns_from_ast() or []

        tags_key_found = any(col.column_name == "tags_key"
                             for expression in select_clause
                             for col in expression if isinstance(col, Column))

        if not tags_key_found:
            return []

        def extract_tags_from_condition(
            cond: Optional[Expression], ) -> Optional[List[str]]:
            if not cond:
                return []
            if any(
                    is_binary_condition(cond, BooleanFunctions.OR)
                    for cond in cond):
                return None
            return self.__extract_top_level_tag_conditions(cond)

        cond_tags_key = extract_tags_from_condition(
            query.get_condition_from_ast())
        if cond_tags_key is None:
            # This means we found an OR. Cowardly we give up even though there could
            # be cases where this condition is still optimizable.
            return []
        having_tags_key = extract_tags_from_condition(
            query.get_having_from_ast())
        if having_tags_key is None:
            # Same as above
            return []

        return cond_tags_key + having_tags_key
def test_query_extension_processing(
    raw_data: dict,
    expected_conditions: Sequence[Condition],
    expected_ast_condition: Expression,
    expected_granularity: int,
):
    state.set_config("max_days", 1)
    extension = TimeSeriesExtension(
        default_granularity=60,
        default_window=timedelta(days=5),
        timestamp_column="timestamp",
    )
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)

    request_settings = HTTPRequestSettings()

    extension.get_processor().process_query(query, valid_data, request_settings)
    assert query.get_conditions() == expected_conditions
    assert query.get_condition_from_ast() == expected_ast_condition
    assert query.get_granularity() == expected_granularity
Example #6
0
    def __init__(
        self,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        # Snuba query structure
        # Referencing them here directly since it makes it easier
        # to process this query independently from the Snuba Query
        # and there is no risk in doing so since they are immutable.
        self.__selected_columns = query.get_selected_columns_from_ast()
        self.__condition = query.get_condition_from_ast()
        self.__groupby = query.get_groupby_from_ast()
        self.__having = query.get_having_from_ast()
        self.__orderby = query.get_orderby_from_ast()
        self.__data_source = query.get_data_source()
        self.__arrayjoin = query.get_arrayjoin_from_ast()
        self.__granularity = query.get_granularity()
        self.__limit = query.get_limit()
        self.__limitby = query.get_limitby()
        self.__offset = query.get_offset()

        if self.__having:
            assert self.__groupby, "found HAVING clause with no GROUP BY"

        # Clickhouse specific fields. Some are still in the Snuba
        # query and have to be moved.
        self.__turbo = settings.get_turbo()
        self.__final = query.get_final()
        self.__sample = query.get_sample()
        self.__hastotals = query.has_totals()
        # TODO: Pre where processing will become a step in Clickhouse Query processing
        # instead of being pulled from the Snuba Query
        self.__prewhere = query.get_prewhere_ast()

        self.__settings = settings
        self.__formatted_query: Optional[str] = None
Example #7
0
class TestProjectExtensionWithGroups(BaseTest):
    def setup_method(self, test_method):
        super().setup_method(test_method)
        raw_data = {"project": 2}

        self.extension = ProjectExtension(
            processor=ProjectWithGroupsProcessor(project_column="project_id")
        )
        self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema())
        self.query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)

    def test_with_turbo(self):
        request_settings = HTTPRequestSettings(turbo=True)

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])

    def test_without_turbo_with_projects_needing_final(self):
        request_settings = HTTPRequestSettings()
        replacer.set_project_needs_final(2)

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert self.query.get_final()

    def test_without_turbo_without_projects_needing_final(self):
        request_settings = HTTPRequestSettings()

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert not self.query.get_final()

    def test_when_there_are_not_many_groups_to_exclude(self):
        request_settings = HTTPRequestSettings()
        state.set_config("max_group_ids_exclude", 5)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        expected = [
            ("project_id", "IN", [2]),
            (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]),
        ]
        assert self.query.get_conditions() == expected
        assert self.query.get_condition_from_ast() == FunctionCall(
            None,
            BooleanFunctions.AND,
            (
                FunctionCall(
                    None,
                    "notIn",
                    (
                        FunctionCall(
                            None, "assumeNotNull", (Column(None, "group_id", None),)
                        ),
                        FunctionCall(
                            None,
                            "tuple",
                            (
                                Literal(None, 100),
                                Literal(None, 101),
                                Literal(None, 102),
                            ),
                        ),
                    ),
                ),
                build_in("project_id", [2]),
            ),
        )
        assert not self.query.get_final()

    def test_when_there_are_too_many_groups_to_exclude(self):
        request_settings = HTTPRequestSettings()
        state.set_config("max_group_ids_exclude", 2)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert self.query.get_final()
Example #8
0
def test_replace_expression():
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "c1", "t1")
    column2 = Column(None, "c2", "t1")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(
        None, ConditionFunctions.EQ, function_1, Literal(None, "1")
    )

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[function_1],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        array_join=None,
        condition=binary_condition(
            None,
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        having=None,
        order_by=[orderby],
    )

    assert (
        query.get_selected_columns_from_ast()
        == expected_query.get_selected_columns_from_ast()
    )
    assert query.get_condition_from_ast() == expected_query.get_condition_from_ast()
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )