Ejemplo n.º 1
0
 def __init__(self, uuid_columns: Set[str]) -> None:
     self.__unique_uuid_columns = uuid_columns
     self.__uuid_column_match = Or([String(u_col) for u_col in uuid_columns])
     self.uuid_in_condition = FunctionCallMatch(
         Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))),
         (
             self.formatted_uuid_pattern(),
             Param("params", FunctionCallMatch(String("tuple"), None)),
         ),
     )
     self.uuid_condition = FunctionCallMatch(
         Or(
             [
                 String(op)
                 for op in FUNCTION_TO_OPERATOR
                 if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN)
             ]
         ),
         (
             Or(
                 (
                     Param("literal_0", LiteralMatch(AnyOptionalString())),
                     self.formatted_uuid_pattern("_0"),
                 )
             ),
             Or(
                 (
                     Param("literal_1", LiteralMatch(AnyOptionalString())),
                     self.formatted_uuid_pattern("_1"),
                 )
             ),
         ),
     )
     self.formatted: Optional[str] = None
Ejemplo n.º 2
0
def extract_granularity_from_query(query: Query, column: str) -> Optional[int]:
    """
    This extracts the `granularity` from the `groupby` statement of the query.
    The matches are essentially the reverse of `TimeSeriesProcessor.__group_time_function`.
    """
    groupby = query.get_groupby()

    column_match = ColumnMatch(None, String(column))
    fn_match = FunctionCallMatch(
        Param(
            "time_fn",
            Or(
                [
                    String("toStartOfHour"),
                    String("toStartOfMinute"),
                    String("toStartOfDay"),
                    String("toDate"),
                ]
            ),
        ),
        (column_match,),
        with_optionals=True,
    )
    expr_match = FunctionCallMatch(
        String("toDateTime"),
        (
            FunctionCallMatch(
                String("multiply"),
                (
                    FunctionCallMatch(
                        String("intDiv"),
                        (
                            FunctionCallMatch(String("toUInt32"), (column_match,)),
                            LiteralMatch(Param("granularity", Any(int))),
                        ),
                    ),
                    LiteralMatch(Param("granularity", Any(int))),
                ),
            ),
            LiteralMatch(Any(str)),
        ),
    )

    for top_expr in groupby:
        for expr in top_expr:
            result = fn_match.match(expr)
            if result is not None:
                return GRANULARITY_MAPPING[result.string("time_fn")]

            result = expr_match.match(expr)
            if result is not None:
                return result.integer("granularity")

    return None
Ejemplo n.º 3
0
class DefaultIfNullFunctionMapper(FunctionCallMapper):
    """
    If a function is being called on a column that doesn't exist, or is being
    called on NULL, change the entire function to be NULL.
    """

    function_match = FunctionCallMatch(StringMatch("identity"),
                                       (LiteralMatch(value=Any(type(None))), ))

    def attempt_map(
        self,
        expression: FunctionCall,
        children_translator: SnubaClickhouseStrictTranslator,
    ) -> Optional[FunctionCall]:

        # HACK: Quick fix to avoid this function dropping important conditions from the query
        logical_functions = {"and", "or", "xor"}

        if expression.function_name in logical_functions:
            return None

        parameters = tuple(
            p.accept(children_translator) for p in expression.parameters)
        for param in parameters:
            # All impossible columns will have been converted to the identity function.
            # So we know that if a function has the identity function as a parameter, we can
            # collapse the entire expression.
            fmatch = self.function_match.match(param)
            if fmatch is not None:
                return identity(Literal(None, None), expression.alias)

        return None
Ejemplo n.º 4
0
class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper):
    """
    If a curried function is being called on a column that doesn't exist, or is being
    called on NULL, change the entire function to be NULL.
    """

    function_match = FunctionCallMatch(StringMatch("identity"),
                                       (LiteralMatch(), ))

    def attempt_map(
        self,
        expression: CurriedFunctionCall,
        children_translator: SnubaClickhouseStrictTranslator,
    ) -> Optional[Union[CurriedFunctionCall, FunctionCall]]:
        internal_function = expression.internal_function.accept(
            children_translator)
        assert isinstance(internal_function, FunctionCall)  # mypy
        parameters = tuple(
            p.accept(children_translator) for p in expression.parameters)
        for param in parameters:
            # All impossible columns that have been converted to NULL will be the identity function.
            # So we know that if a function has the identity function as a parameter, we can
            # collapse the entire expression.
            fmatch = self.function_match.match(param)
            if fmatch is not None:
                return identity(Literal(None, None), expression.alias)

        return None
Ejemplo n.º 5
0
 def process_query(self, query: Query,
                   query_settings: QuerySettings) -> None:
     having_clause = query.get_having()
     if not having_clause:
         return None
     selected_columns = query.get_selected_columns()
     uniq_matcher = Param("function", FunctionCallMatch(String("uniq")))
     found_functions = []
     for exp in having_clause:
         match = uniq_matcher.match(exp)
         if match is not None:
             found_functions.append(match.expression("function"))
     if found_functions is not None:
         matcher = _ExpressionOrAliasMatcher(found_functions)
         for col in selected_columns:
             col.expression.accept(matcher)
         if not all(matcher.found_expressions):
             should_throw = get_config("throw_on_uniq_select_and_having",
                                       False)
             error = MismatchedAggregationException(
                 "Aggregation is in HAVING clause but not SELECT",
                 query=str(query))
             if should_throw:
                 raise error
             else:
                 logging.warning(
                     "Aggregation is in HAVING clause but not SELECT",
                     exc_info=True,
                     extra=cast(Dict[str, Any], error.to_dict()),
                 )
Ejemplo n.º 6
0
 def formatted_uuid_pattern(self, suffix: str = "") -> FunctionCallMatch:
     return FunctionCallMatch(
         String("replaceAll"),
         (
             FunctionCallMatch(
                 String("toString"),
                 (
                     Param(
                         "formatted_uuid_column" + suffix,
                         ColumnMatch(None, self.__uuid_column_match),
                     ),
                 ),
             ),
         ),
         with_optionals=True,
     )
Ejemplo n.º 7
0
    def __init__(self, columns: Set[str]):
        self.columns = columns
        column_match = Or([String(col) for col in columns])

        literal = Param("literal", LiteralMatch(AnyMatch(str)))

        operator = Param(
            "operator",
            Or(
                [
                    String(op)
                    for op in FUNCTION_TO_OPERATOR
                    if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN)
                ]
            ),
        )

        in_operators = Param(
            "operator",
            Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))),
        )

        col = Param("col", ColumnMatch(None, column_match))

        self.__condition_matcher = Or(
            [
                FunctionCallMatch(operator, (literal, col)),
                FunctionCallMatch(operator, (col, literal)),
                FunctionCallMatch(Param("operator", String("has")), (col, literal)),
            ]
        )

        self.__in_condition_matcher = FunctionCallMatch(
            in_operators,
            (
                col,
                Param(
                    "tuple",
                    FunctionCallMatch(String("tuple"), all_parameters=LiteralMatch()),
                ),
            ),
        )
Ejemplo n.º 8
0
 def build_match(
     col: str, ops: Sequence[str], param_type: Any
 ) -> Or[Expression]:
     # The IN condition has to be checked separately since each parameter
     # has to be checked individually.
     column_match = ColumnMatch(alias_match, StringMatch(col))
     return Or(
         [
             FunctionCallMatch(
                 Or([StringMatch(op) for op in ops]),
                 (column_match, LiteralMatch(AnyMatch(param_type))),
             ),
             FunctionCallMatch(
                 StringMatch(ConditionFunctions.IN),
                 (
                     column_match,
                     FunctionCallMatch(
                         Or([StringMatch("array"), StringMatch("tuple")]),
                         all_parameters=LiteralMatch(AnyMatch(param_type)),
                     ),
                 ),
             ),
         ]
     )
Ejemplo n.º 9
0
class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper):
    """
    If a curried function is being called on a column that doesn't exist, or is being
    called on NULL, change the entire function to be NULL.
    """

    function_match = FunctionCallMatch(StringMatch("ifNull"),
                                       (LiteralMatch(), LiteralMatch()))

    def attempt_map(
        self,
        expression: CurriedFunctionCall,
        children_translator: SnubaClickhouseStrictTranslator,
    ) -> Optional[CurriedFunctionCall]:
        internal_function = expression.internal_function.accept(
            children_translator)
        assert isinstance(internal_function, FunctionCall)  # mypy
        parameters = tuple(
            p.accept(children_translator) for p in expression.parameters)

        all_null = True
        for param in parameters:
            # Handle wrapped functions that have been converted to ifNull(NULL, NULL)
            fmatch = self.function_match.match(param)
            if fmatch is None:
                if isinstance(param, Literal):
                    if param.value is not None:
                        all_null = False
                        break
                else:
                    all_null = False
                    break

        if all_null and len(parameters) > 0:
            # Currently curried function mappers require returning other curried functions.
            # So return this to keep the mapper happy.
            return CurriedFunctionCall(
                alias=expression.alias,
                internal_function=FunctionCall(
                    None,
                    f"{internal_function.function_name}OrNull",
                    internal_function.parameters,
                ),
                parameters=tuple(Literal(None, None) for p in parameters),
            )

        return None
Ejemplo n.º 10
0
class DefaultIfNullFunctionMapper(FunctionCallMapper):
    """
    If a function is being called on a column that doesn't exist, or is being
    called on NULL, change the entire function to be NULL.
    """

    function_match = FunctionCallMatch(StringMatch("ifNull"),
                                       (LiteralMatch(), LiteralMatch()))

    def attempt_map(
        self,
        expression: FunctionCall,
        children_translator: SnubaClickhouseStrictTranslator,
    ) -> Optional[FunctionCall]:
        parameters = tuple(
            p.accept(children_translator) for p in expression.parameters)
        all_null = True
        for param in parameters:
            # Handle wrapped functions that have been converted to ifNull(NULL, NULL)
            fmatch = self.function_match.match(param)
            if fmatch is None:
                if isinstance(param, Literal):
                    if param.value is not None:
                        all_null = False
                        break
                else:
                    all_null = False
                    break

        if all_null and len(parameters) > 0:
            # Currently function mappers require returning other functions. So return this
            # to keep the mapper happy.
            return FunctionCall(expression.alias, "ifNull",
                                (Literal(None, None), Literal(None, None)))

        return None
Ejemplo n.º 11
0
    # {"a": f(x), "x": g(k)} -> {"a": f(g(k)), "x": g(k)}
    aliased_expressions = {
        exp.alias: exp
        for exp in query.get_all_expressions() if exp.alias is not None
    }
    fully_resolved_aliases = {
        alias: exp.accept(
            AliasExpanderVisitor(aliased_expressions, [], expand_nested=True))
        for alias, exp in aliased_expressions.items()
    }

    visitor = AliasExpanderVisitor(fully_resolved_aliases, [])
    query.transform(visitor)


ARRAYJOIN_FUNCTION_MATCH = FunctionCallMatch(StringMatch("arrayJoin"), None)


def _validate_arrayjoin(query: Query) -> None:
    # TODO: Actually validate arrayjoin. For now log how it is used.
    body_arrayjoin = ""
    arrayjoin = query.get_arrayjoin_from_ast()
    if arrayjoin is not None:
        if isinstance(arrayjoin, Column):
            body_arrayjoin = arrayjoin.column_name

    array_joins = set()
    if body_arrayjoin:
        array_joins.add(body_arrayjoin)
    for exp in query.get_all_expressions():
        match = ARRAYJOIN_FUNCTION_MATCH.match(exp)
Ejemplo n.º 12
0
        selected_entity = match_query_to_entity(query, EVENTS_COLUMNS,
                                                TRANSACTIONS_COLUMNS)

        track_bad_query(query, selected_entity, EVENTS_COLUMNS,
                        TRANSACTIONS_COLUMNS)

        return selected_entity


metrics = MetricsWrapper(environment.metrics, "api.discover")
logger = logging.getLogger(__name__)

EVENT_CONDITION = FunctionCallMatch(
    Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])),
    (
        Or([ColumnMatch(None, StringMatch("type")),
            LiteralMatch(None)]),
        Param("event_type", Or([ColumnMatch(), LiteralMatch()])),
    ),
)

TRANSACTION_FUNCTIONS = FunctionCallMatch(
    Or([StringMatch("apdex"),
        StringMatch("failure_rate")]), None)

EVENT_FUNCTIONS = FunctionCallMatch(
    Or([StringMatch("isHandled"),
        StringMatch("notHandled")]), None)


def match_query_to_entity(
    query: Query,
Ejemplo n.º 13
0
    def __init__(self, time_group_columns: Mapping[str, str],
                 time_parse_columns: Sequence[str]) -> None:
        # Column names that should be mapped to different columns.
        self.__time_replace_columns = time_group_columns

        # time_parse_columns is a list of columns that, if used in a condition, should be compared with datetimes.
        # The columns here might overlap with the columns that get replaced, so we have to search for transformed
        # columns.
        column_match = ColumnMatch(
            None,
            Param(
                "column_name",
                Or([String(tc) for tc in time_parse_columns]),
            ),
        )
        self.condition_match = FunctionCallMatch(
            Or([
                String(ConditionFunctions.GT),
                String(ConditionFunctions.GTE),
                String(ConditionFunctions.LT),
                String(ConditionFunctions.LTE),
                String(ConditionFunctions.EQ),
                String(ConditionFunctions.NEQ),
            ]),
            (
                Or([
                    column_match,
                    FunctionCallMatch(
                        Or([
                            String("toStartOfHour"),
                            String("toStartOfMinute"),
                            String("toStartOfDay"),
                            String("toDate"),
                        ]),
                        (column_match, ),
                        with_optionals=True,
                    ),
                    FunctionCallMatch(
                        String("toDateTime"),
                        (
                            FunctionCallMatch(
                                String("multiply"),
                                (
                                    FunctionCallMatch(
                                        String("intDiv"),
                                        (
                                            FunctionCallMatch(
                                                String("toUInt32"),
                                                (column_match, ),
                                            ),
                                            LiteralMatch(Any(int)),
                                        ),
                                    ),
                                    LiteralMatch(Any(int)),
                                ),
                            ),
                            LiteralMatch(Any(str)),
                        ),
                    ),
                ]),
                Param("literal", LiteralMatch(Any(str))),
            ),
        )
Ejemplo n.º 14
0
from snuba.utils.metrics.wrapper import MetricsWrapper

EVENTS = EntityKey.EVENTS
TRANSACTIONS = EntityKey.TRANSACTIONS
EVENTS_AND_TRANSACTIONS = "events_and_transactions"

metrics = MetricsWrapper(environment.metrics, "api.discover")
logger = logging.getLogger(__name__)

EVENT_CONDITION = FunctionCallMatch(
    None,
    Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])),
    (
        Or(
            [
                ColumnMatch(None, None, StringMatch("type")),
                LiteralMatch(StringMatch("type"), None),
            ]
        ),
        Param("event_type", Or([ColumnMatch(), LiteralMatch()])),
    ),
)


def match_query_to_table(
    query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet
) -> Union[EntityKey, str]:
    # First check for a top level condition on the event type
    condition = query.get_condition_from_ast()
    event_types = set()
    if condition:
Ejemplo n.º 15
0
    def __init__(self, columns: Set[str], optimize_ordering: bool = False):
        self.columns = columns
        self.optimize_ordering = optimize_ordering
        column_match = Or([String(col) for col in columns])

        literal = Param("literal", LiteralMatch(AnyMatch(str)))

        ordering_operators = (
            ConditionFunctions.GT,
            ConditionFunctions.GTE,
            ConditionFunctions.LT,
            ConditionFunctions.LTE,
        )

        operator = Param(
            "operator",
            Or([
                String(op) for op in (
                    ConditionFunctions.EQ,
                    ConditionFunctions.NEQ,
                    ConditionFunctions.IS_NULL,
                    ConditionFunctions.IS_NOT_NULL,
                    *(ordering_operators if self.optimize_ordering else ()),
                )
            ]),
        )

        unoptimizable_operator = Param(
            "operator",
            Or([
                String(op) for op in (
                    ConditionFunctions.LIKE,
                    ConditionFunctions.NOT_LIKE,
                    *(() if self.optimize_ordering else ordering_operators),
                )
            ]),
        )

        in_operators = Param(
            "operator",
            Or((String(ConditionFunctions.IN),
                String(ConditionFunctions.NOT_IN))),
        )

        col = Param("col", ColumnMatch(None, column_match))

        self.__condition_matcher = Or([
            FunctionCallMatch(operator, (literal, col)),
            FunctionCallMatch(operator, (col, literal)),
            FunctionCallMatch(Param("operator", String("has")),
                              (col, literal)),
        ])

        self.__in_condition_matcher = FunctionCallMatch(
            in_operators,
            (
                col,
                Param(
                    "tuple",
                    FunctionCallMatch(String("tuple"),
                                      all_parameters=LiteralMatch()),
                ),
            ),
        )

        self.__unoptimizable_condition_matcher = Or([
            FunctionCallMatch(unoptimizable_operator, (literal, col)),
            FunctionCallMatch(unoptimizable_operator, (col, literal)),
        ])
Ejemplo n.º 16
0
 def __post_init__(self) -> None:
     self.function_match = FunctionCallMatch(
         Or([StringMatch(func) for func in self.function_names]))
Ejemplo n.º 17
0
        if not isinstance(exp, Column):
            return exp

        parts = exp.column_name.split(".", 1)
        if len(parts) != 2 or parts[0] not in aliases:
            raise ParsingException(
                f"column {exp.column_name} must be qualified in a join query"
            )

        return Column(exp.alias, parts[0], parts[1])

    query.transform_expressions(transform)


DATETIME_MATCH = FunctionCallMatch(
    StringMatch("toDateTime"), (Param("date_string", LiteralMatch(AnyMatch(str))),)
)


def _parse_datetime_literals(
    query: Union[CompositeQuery[QueryEntity], LogicalQuery]
) -> None:
    def parse(exp: Expression) -> Expression:
        result = DATETIME_MATCH.match(exp)
        if result is not None:
            date_string = result.expression("date_string")
            assert isinstance(date_string, Literal)  # mypy
            assert isinstance(date_string.value, str)  # mypy
            return Literal(exp.alias, parse_datetime(date_string.value))

        return exp