Example #1
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        def transform_nested_column(exp: Expression) -> Expression:
            subscript = match_subscriptable_reference(exp)
            if subscript is None:
                return exp

            if subscript.column_name in self.__specs:
                promoted_col_name = self.__specs[subscript.column_name].get(
                    subscript.key)
                if promoted_col_name is not None:
                    col_type = (query.get_from_clause().get_columns().get(
                        promoted_col_name, None))
                    col_type_name = str(col_type) if col_type else None

                    # We need to pass the content of the promoted column to a toString
                    # function when the promoted column is not a string since the
                    # supported values of mapping columns are strings and the clients
                    # expect such.
                    if not self.__cast_to_string or (
                            col_type_name and "String" in col_type_name
                            and "FixedString" not in col_type_name):
                        return Column(exp.alias, subscript.table_name,
                                      promoted_col_name)
                    else:
                        return FunctionCall(
                            exp.alias,
                            "toString",
                            (Column(None, subscript.table_name,
                                    promoted_col_name), ),
                        )

            return exp

        query.transform_expressions(transform_nested_column)
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "group_id":
                    return FunctionCall(
                        exp.alias,
                        "nullIf",
                        (
                            Column(None, exp.table_name, exp.column_name),
                            Literal(None, 0),
                        ),
                    )
                elif exp.column_name == "message":
                    # Because of the rename from message->search_message without backfill,
                    # records will have one or the other of these fields.
                    # TODO this can be removed once all data has search_message filled in.
                    return FunctionCall(
                        exp.alias,
                        "coalesce",
                        (
                            Column(None, exp.table_name, "search_message"),
                            Column(None, exp.table_name, exp.column_name),
                        ),
                    )

            return exp

        query.transform_expressions(process_column)
Example #3
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        array_joins_in_query = self.__get_array_joins_in_query(query)

        tuple_alias = self.__get_unused_alias(query)

        single_filtered, multiple_filtered = self.get_filtered_arrays(
            query, self.key_columns)

        def replace_expression(expr: Expression) -> Expression:
            match = self.__array_join_pattern.match(expr)

            # The arrayJoins we are looking for are not present, so skip this entirely
            if match is None:
                return expr

            # All of the possible array joins are present
            if array_joins_in_query == set(self.all_columns):
                tuple_index = self.__find_tuple_index(match.string("col"))

                single_index_filtered = {
                    self.__find_tuple_index(column_name): filtered
                    for column_name, filtered in single_filtered.items()
                }

                multiple_indices_filtered = {
                    tuple(
                        self.__find_tuple_index(column)
                        for column in column_names): filtered
                    for column_names, filtered in multiple_filtered.items()
                }

                if single_filtered or multiple_filtered:
                    return filtered_mapping_tuples(
                        expr.alias,
                        tuple_alias,
                        tuple_index,
                        self.all_columns,
                        single_index_filtered,
                        multiple_indices_filtered,
                    )

                return unfiltered_mapping_tuples(expr.alias, tuple_alias,
                                                 tuple_index, self.all_columns)

            # Only array join present is one of the key columns
            elif len(array_joins_in_query) == 1 and any(
                    column in array_joins_in_query
                    for column in self.key_columns):
                column_name = array_joins_in_query.pop()
                if column_name in single_filtered:
                    return filtered_mapping_keys(expr.alias, column_name,
                                                 single_filtered[column_name])

            # No viable optimization
            return expr

        query.transform_expressions(replace_expression)
Example #4
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        def replace_expression(expr: Expression) -> Expression:
            match = self.__array_has_pattern.match(expr)

            # The has condition we are looking for are not present, so skip this entirely
            if match is None:
                return expr

            return match.expression("has")

        query.transform_expressions(replace_expression)
Example #5
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        query.transform_expressions(
            self._process_expressions, skip_transform_condition=True
        )

        condition = query.get_condition()
        if condition is not None:
            processed = condition.transform(self.__process_optimizable_condition)
            if processed == condition:
                processed = condition.transform(self._process_expressions)

            query.set_ast_condition(processed)
Example #6
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        matcher = FunctionCall(
            String("arrayElement"),
            (
                Column(
                    None,
                    String("contexts.value"),
                ),
                FunctionCall(
                    String("indexOf"),
                    (
                        Column(None, String("contexts.key")),
                        Literal(
                            Or([
                                String("device.simulator"),
                                String("device.online"),
                                String("device.charging"),
                            ]), ),
                    ),
                ),
            ),
        )

        def process_column(exp: Expression) -> Expression:
            match = matcher.match(exp)

            if match:
                inner = replace(exp, alias=None)
                return FunctionCallExpr(
                    exp.alias,
                    "if",
                    (
                        binary_condition(
                            ConditionFunctions.IN,
                            inner,
                            literals_tuple(
                                None,
                                [
                                    LiteralExpr(None, "1"),
                                    LiteralExpr(None, "True")
                                ],
                            ),
                        ),
                        LiteralExpr(None, "True"),
                        LiteralExpr(None, "False"),
                    ),
                )

            return exp

        query.transform_expressions(process_column)
Example #7
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "user":
                    return FunctionCall(
                        exp.alias,
                        "nullIf",
                        (Column(None, None, "user"), Literal(None, "")),
                    )

            return exp

        query.transform_expressions(process_column)
Example #8
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        def replace_exp(exp: Expression) -> Expression:
            matcher = FunctionCall(
                String("notEquals"),
                (Column(None, String("type")), Literal(String("transaction"))),
            )

            if matcher.match(exp):
                return LiteralExpr(None, 1)

            return exp

        query.transform_expressions(replace_exp)
Example #9
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        query.transform_expressions(self._process_expressions,
                                    skip_transform_condition=True)

        condition = query.get_condition()
        if condition is not None:
            if self.__contains_unoptimizable_condition(condition):
                processed = condition.transform(self._process_expressions)
            else:
                processed = condition.transform(
                    self.__process_optimizable_condition)
                if condition == processed:
                    processed = processed.transform(self._process_expressions)

            query.set_ast_condition(processed)
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "group_id":
                    return FunctionCall(
                        exp.alias,
                        "nullIf",
                        (
                            Column(None, exp.table_name, exp.column_name),
                            Literal(None, 0),
                        ),
                    )

            return exp

        query.transform_expressions(process_column)
Example #11
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        # We care only of promoted contexts, so we do not need to match
        # the original nested expression.
        matcher = FunctionCall(
            String("toString"),
            (
                Column(
                    None,
                    Or(
                        [
                            String("device_simulator"),
                            String("device_online"),
                            String("device_charging"),
                        ]
                    ),
                ),
            ),
        )

        def replace_exp(exp: Expression) -> Expression:
            if matcher.match(exp) is not None:
                inner = replace(exp, alias=None)
                return FunctionCallExpr(
                    exp.alias,
                    "multiIf",
                    (
                        binary_condition(
                            None, ConditionFunctions.EQ, inner, Literal(None, "")
                        ),
                        Literal(None, ""),
                        binary_condition(
                            None,
                            ConditionFunctions.IN,
                            inner,
                            literals_tuple(
                                None, [Literal(None, "1"), Literal(None, "True")]
                            ),
                        ),
                        Literal(None, "True"),
                        Literal(None, "False"),
                    ),
                )
            return exp

        query.transform_expressions(replace_exp)
Example #12
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "event_id":
                    return FunctionCall(
                        exp.alias,
                        "replaceAll",
                        (
                            FunctionCall(
                                None, "toString", (Column(None, None, "event_id"),),
                            ),
                            Literal(None, "-"),
                            Literal(None, ""),
                        ),
                    )

            return exp

        query.transform_expressions(process_column)
Example #13
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        def cast_column_to_nullable(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name in self.mismatched_null_columns:
                    # depending on the order of the storage, this dictionary will contain
                    # either the nullable or non-nullable version of the column. No matter
                    # which one is in there, due to the mismatch on the merge table it needs to
                    # be cast as nullable anyways
                    mismatched_column = self.mismatched_null_columns[
                        exp.column_name]
                    col_is_nullable = _col_is_nullable(mismatched_column)
                    col_type = mismatched_column.type.for_schema()
                    cast_str = col_type if col_is_nullable else f"Nullable({col_type})"
                    return FunctionCall(
                        exp.alias,
                        "cast",
                        (
                            # move the alias up to the cast function
                            Column(
                                None,
                                table_name=exp.table_name,
                                column_name=exp.column_name,
                            ),
                            Literal(None, cast_str),
                        ),
                    )
            return exp

        def transform_aggregate_functions_with_mismatched_nullable_parameters(
            exp: Expression, ) -> Expression:
            if (isinstance(exp, FunctionCall)
                    and exp.function_name in AGGREGATION_FUNCTIONS):
                return exp.transform(cast_column_to_nullable)
            return exp

        query.transform_expressions(
            transform_aggregate_functions_with_mismatched_nullable_parameters)
Example #14
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        arrayjoin_pattern = FunctionCall(
            String("arrayJoin"),
            (Column(column_name=Param(
                "col",
                Or([
                    String(key_column(self.__column_name)),
                    String(val_column(self.__column_name)),
                ]),
            ), ), ),
        )

        arrayjoins_in_query = set()
        for e in query.get_all_expressions():
            match = arrayjoin_pattern.match(e)
            if match is not None:
                arrayjoins_in_query.add(match.string("col"))

        filtered_keys = [
            LiteralExpr(None, key)
            for key in get_filtered_mapping_keys(query, self.__column_name)
        ]

        # Ensures the alias we apply to the arrayJoin is not already taken.
        used_aliases = {exp.alias for exp in query.get_all_expressions()}
        pair_alias_root = f"snuba_all_{self.__column_name}"
        pair_alias = pair_alias_root
        index = 0
        while pair_alias in used_aliases:
            index += 1
            pair_alias = f"{pair_alias_root}_{index}"

        def replace_expression(expr: Expression) -> Expression:
            """
            Applies the appropriate optimization on a single arrayJoin expression.
            """
            match = arrayjoin_pattern.match(expr)
            if match is None:
                return expr

            if arrayjoins_in_query == {
                    key_column(self.__column_name),
                    val_column(self.__column_name),
            }:
                # Both arrayJoin(col.key) and arrayJoin(col.value) expressions
                # present int the query. Do the arrayJoin on key-value pairs
                # instead of independent arrayjoin for keys and values.
                array_index = (LiteralExpr(
                    None, 1) if match.string("col") == key_column(
                        self.__column_name) else LiteralExpr(None, 2))

                if not filtered_keys:
                    return _unfiltered_mapping_pairs(expr.alias,
                                                     self.__column_name,
                                                     pair_alias, array_index)
                else:
                    return _filtered_mapping_pairs(
                        expr.alias,
                        self.__column_name,
                        pair_alias,
                        filtered_keys,
                        array_index,
                    )

            elif filtered_keys:
                # Only one between arrayJoin(col.key) and arrayJoin(col.value)
                # is present, and it is arrayJoin(col.key) since we found
                # filtered keys.
                return _filtered_mapping_keys(expr.alias, self.__column_name,
                                              filtered_keys)
            else:
                # No viable optimization
                return expr

        query.transform_expressions(replace_expression)
Example #15
0
 def process_query(self, query: Query, query_settings: QuerySettings) -> None:
     query.transform_expressions(self._process_expressions)
Example #16
0
def test_replace_expression() -> None:
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1"))

    prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        prewhere=prewhere,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        Table("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),))
            )
        ],
        array_join=None,
        condition=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        prewhere=binary_condition(
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "2"),
        ),
        having=None,
        order_by=[orderby],
    )

    assert query.get_selected_columns() == expected_query.get_selected_columns()
    assert query.get_condition() == expected_query.get_condition()
    assert query.get_groupby() == expected_query.get_groupby()
    assert query.get_having() == expected_query.get_having()
    assert query.get_orderby() == expected_query.get_orderby()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )
 def process_query(self, query: Query,
                   request_settings: RequestSettings) -> None:
     query.transform_expressions(self._process_expressions)