Beispiel #1
0
def test_first_level_conditions() -> None:
    c1 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table1", "column1"),
        Literal(None, "test"),
    )
    c2 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table2", "column2"),
        Literal(None, "test"),
    )
    c3 = binary_condition(
        ConditionFunctions.EQ,
        Column(None, "table3", "column3"),
        Literal(None, "test"),
    )

    cond = binary_condition(
        BooleanFunctions.AND,
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.AND,
        FunctionCall(None, "equals",
                     (FunctionCall(None, "and", (c1, c2)), Literal(None, 1))),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.OR,
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_or_conditions(cond) == [
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    ]

    cond = binary_condition(
        ConditionFunctions.EQ,
        binary_condition(BooleanFunctions.OR, c1,
                         binary_condition(BooleanFunctions.AND, c2, c3)),
        Literal(None, 1),
    )
    assert get_first_level_or_conditions(cond) == [
        c1,
        binary_condition(BooleanFunctions.AND, c2, c3),
    ]
Beispiel #2
0
    def visit_function_call(self, exp: FunctionCall) -> str:
        if exp.function_name == "array":
            # Workaround for https://github.com/ClickHouse/ClickHouse/issues/11622
            # Some distributed queries fail when arrays are passed as array(1,2,3)
            # and work when they are passed as [1, 2, 3]
            return self._alias(f"[{self.__visit_params(exp.parameters)}]",
                               exp.alias)

        if exp.function_name == "tuple" and len(exp.parameters) > 1:
            # Some distributed queries fail when tuples are passed as tuple(1,2,3)
            # and work when they are passed as (1, 2, 3)
            # to be safe, only do this for when a tuple has more than one element otherwise clickhouse
            # will interpret (1) -> 1 which will break things like 1 IN tuple(1)
            return self._alias(f"({self.__visit_params(exp.parameters)})",
                               exp.alias)

        elif exp.function_name == BooleanFunctions.AND:
            formatted = (c.accept(self)
                         for c in get_first_level_and_conditions(exp))
            return " AND ".join(formatted)

        elif exp.function_name == BooleanFunctions.OR:
            formatted = (c.accept(self)
                         for c in get_first_level_or_conditions(exp))
            return f"({' OR '.join(formatted)})"

        ret = f"{escape_identifier(exp.function_name)}({self.__visit_params(exp.parameters)})"
        return self._alias(ret, exp.alias)
 def is_skippable_condition(conditions: Expression) -> bool:
     """
     A condition composed of a bunch of has(column, ...) conditions OR'ed together
     can be ignored when looking for filter keys because these are the conditions
     used for the bloom filter index on the array column.
     """
     for column_name in column_names:
         has_pattern = FunctionCall(
             String("has"),
             (Column(column_name=String(column_name)), Literal(Any(str))),
         )
         if all(
             has_pattern.match(c) for c in get_first_level_or_conditions(conditions)
         ):
             return True
     return False
Beispiel #4
0
    def visit_function_call(self, exp: FunctionCall) -> str:
        if exp.function_name == "array":
            # Workaround for https://github.com/ClickHouse/ClickHouse/issues/11622
            # Some distributed queries fail when arrays are passed as array(1,2,3)
            # and work when they are passed as [1, 2, 3]
            return self._alias(f"[{self.__visit_params(exp.parameters)}]", exp.alias)

        elif exp.function_name == BooleanFunctions.AND:
            formatted = (c.accept(self) for c in get_first_level_and_conditions(exp))
            return " AND ".join(formatted)

        elif exp.function_name == BooleanFunctions.OR:
            formatted = (c.accept(self) for c in get_first_level_or_conditions(exp))
            return f"({' OR '.join(formatted)})"

        ret = f"{escape_identifier(exp.function_name)}({self.__visit_params(exp.parameters)})"
        return self._alias(ret, exp.alias)
Beispiel #5
0
 def __classify_combined_conditions(self, condition: Expression) -> ConditionClass:
     if not isinstance(condition, FunctionExpr):
         return ConditionClass.IRRELEVANT
     elif condition.function_name in (BooleanFunctions.AND, BooleanFunctions.OR):
         conditions = (
             get_first_level_and_conditions(condition)
             if condition.function_name == BooleanFunctions.AND
             else get_first_level_or_conditions(condition)
         )
         classified = {self.__classify_combined_conditions(c) for c in conditions}
         if ConditionClass.NOT_OPTIMIZABLE in classified:
             return ConditionClass.NOT_OPTIMIZABLE
         elif ConditionClass.OPTIMIZABLE in classified:
             return ConditionClass.OPTIMIZABLE
         else:
             return ConditionClass.IRRELEVANT
     else:
         return self.__classify_condition(condition)
Beispiel #6
0
def test_first_level_conditions() -> None:
    c1 = binary_condition(
        None,
        ConditionFunctions.EQ,
        Column(None, "table1", "column1"),
        Literal(None, "test"),
    )
    c2 = binary_condition(
        None,
        ConditionFunctions.EQ,
        Column(None, "table2", "column2"),
        Literal(None, "test"),
    )
    c3 = binary_condition(
        None,
        ConditionFunctions.EQ,
        Column(None, "table3", "column3"),
        Literal(None, "test"),
    )

    cond = binary_condition(
        None,
        BooleanFunctions.AND,
        binary_condition(None, BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        None,
        BooleanFunctions.OR,
        binary_condition(None, BooleanFunctions.AND, c1, c2),
        c3,
    )
    assert get_first_level_or_conditions(cond) == [
        binary_condition(None, BooleanFunctions.AND, c1, c2),
        c3,
    ]
Beispiel #7
0
    def _get_condition_without_redundant_checks(
        self, condition: Expression, query: Query
    ) -> Expression:
        """Optimizes the case where the query condition contains the following:

        valueOf('my_tag') != '' AND valueOf('my_tag') == "something"
                          ^                            ^
                          |                            |
                      existence check               value check

        the existence check in this clause is redundant and prevents the hashmap
        optimization from being applied.

        This function will remove all tag existence checks
        from the condition IFF they are ANDed with a value check for the *same tag name*

        Side effects:
            This function works by flattening first level AND conditions to find clauses where
            existence checks and value checks are ANDed together. When the AND conditions are recombined,
            they are not guaranteed to be in the same structure (but are guaranteed to be functionally equivalent)

            Example:
                ┌───┐         ┌───┐
                │AND│         │AND│
                ├──┬┘         └┬──┤
                │  │           │  │
             ┌──┴┐ c           a ┌┴──┐
             │AND│    becomes    │AND│
             └┬─┬┘               ├──┬┘
              │ │                │  │
              a b                b  c
        """
        if not isinstance(condition, FunctionExpr):
            return condition
        elif condition.function_name == BooleanFunctions.OR:
            sub_conditions = get_first_level_or_conditions(condition)
            pruned_conditions = [
                self._get_condition_without_redundant_checks(c, query)
                for c in sub_conditions
            ]
            return combine_or_conditions(pruned_conditions)
        elif condition.function_name == BooleanFunctions.AND:
            sub_conditions = get_first_level_and_conditions(condition)
            tag_eq_match_strings = set()
            matched_tag_exists_conditions = {}
            for condition_id, cond in enumerate(sub_conditions):
                tag_exist_match = None
                for tag_exists_pattern in self.__tag_exists_patterns:
                    tag_exist_match = tag_exists_pattern.match(cond)
                    if tag_exist_match:
                        matched_tag_exists_conditions[condition_id] = tag_exist_match
                if not tag_exist_match:
                    eq_match = self.__optimizable_pattern.match(cond)
                    if eq_match:
                        tag_eq_match_strings.add(eq_match.string(KEY_MAPPING_PARAM))
            useful_conditions = []
            for condition_id, cond in enumerate(sub_conditions):
                tag_exist_match = matched_tag_exists_conditions.get(condition_id, None)
                if tag_exist_match:
                    requested_tag = tag_exist_match.string("key")
                    if requested_tag in tag_eq_match_strings:
                        # the clause is redundant, thus we continue the loop
                        # and do not add it to useful_conditions
                        continue
                useful_conditions.append(
                    self._get_condition_without_redundant_checks(cond, query)
                )
            return combine_and_conditions(useful_conditions)
        else:
            return condition