Beispiel #1
0
 def test_escape(self):
     assert escape_literal(r"'") == r"'\''"
     assert escape_literal(r"\'") == r"'\\\''"
     assert escape_literal(date(2001, 1, 1)) == "toDate('2001-01-01')"
     assert escape_literal(datetime(2001, 1, 1, 1, 1, 1)) == "toDateTime('2001-01-01T01:01:01')"
     assert escape_literal([1, 'a', date(2001, 1, 1)]) ==\
         "(1, 'a', toDate('2001-01-01'))"
Beispiel #2
0
def conditions_expr(dataset,
                    conditions,
                    query: Query,
                    parsing_context: ParsingContext,
                    depth=0):
    """
    Return a boolean expression suitable for putting in the WHERE clause of the
    query.  The expression is constructed by ANDing groups of OR expressions.
    Expansion of columns is handled, as is replacement of columns with aliases,
    if the column has already been expanded and aliased elsewhere.
    """
    from snuba.clickhouse.columns import Array

    if not conditions:
        return ''

    if depth == 0:
        # dedupe conditions at top level, but keep them in order
        sub = OrderedDict(
            (conditions_expr(dataset, cond, query, parsing_context, depth + 1),
             None) for cond in conditions)
        return u' AND '.join(s for s in sub.keys() if s)
    elif is_condition(conditions):
        lhs, op, lit = dataset.process_condition(conditions)

        # facilitate deduping IN conditions by sorting them.
        if op in ('IN', 'NOT IN') and isinstance(lit, tuple):
            lit = tuple(sorted(lit))

        # If the LHS is a simple column name that refers to an array column
        # (and we are not arrayJoining on that column, which would make it
        # scalar again) and the RHS is a scalar value, we assume that the user
        # actually means to check if any (or all) items in the array match the
        # predicate, so we return an `any(x == value for x in array_column)`
        # type expression. We assume that operators looking for a specific value
        # (IN, =, LIKE) are looking for rows where any array value matches, and
        # exclusionary operators (NOT IN, NOT LIKE, !=) are looking for rows
        # where all elements match (eg. all NOT LIKE 'foo').
        columns = dataset.get_dataset_schemas().get_read_schema().get_columns()
        if (isinstance(lhs, str) and lhs in columns
                and isinstance(columns[lhs].type, Array)
                and columns[lhs].base_name != query.get_arrayjoin()
                and not isinstance(lit, (list, tuple))):
            any_or_all = 'arrayExists' if op in POSITIVE_OPERATORS else 'arrayAll'
            return u'{}(x -> assumeNotNull(x {} {}), {})'.format(
                any_or_all, op, escape_literal(lit),
                column_expr(dataset, lhs, query, parsing_context))
        else:
            return u'{} {} {}'.format(
                column_expr(dataset, lhs, query, parsing_context), op,
                escape_literal(lit))

    elif depth == 1:
        sub = (conditions_expr(dataset, cond, query, parsing_context,
                               depth + 1) for cond in conditions)
        sub = [s for s in sub if s]
        res = u' OR '.join(sub)
        return u'({})'.format(res) if len(sub) > 1 else res
    else:
        raise InvalidConditionException(str(conditions))
Beispiel #3
0
 def test_escape(self):
     assert escape_literal(r"'") == r"'\''"
     assert escape_literal(r"\'") == r"'\\\''"
     assert escape_literal(date(2001, 1,
                                1)) == "toDate('2001-01-01', 'Universal')"
     assert (escape_literal(
         datetime(2001, 1, 1, 1, 1,
                  1)) == "toDateTime('2001-01-01T01:01:01', 'Universal')")
     assert (escape_literal([1, "a", date(
         2001, 1, 1)]) == "(1, 'a', toDate('2001-01-01', 'Universal'))")
Beispiel #4
0
def column_expr(dataset,
                column_name,
                query: Query,
                parsing_context: ParsingContext,
                alias=None,
                aggregate=None):
    """
    Certain special column names expand into more complex expressions. Return
    a 2-tuple of:
        (expanded column expression, sanitized alias)

    Needs the body of the request for some extra data used to expand column expressions.
    """
    assert column_name or aggregate
    assert not aggregate or (aggregate and (column_name or alias))
    column_name = column_name or ''

    if is_function(column_name, 0):
        return complex_column_expr(dataset, column_name, query,
                                   parsing_context)
    elif isinstance(column_name, (list, tuple)) and aggregate:
        return complex_column_expr(dataset, [aggregate, column_name, alias],
                                   query, parsing_context)
    elif isinstance(column_name, str) and QUOTED_LITERAL_RE.match(column_name):
        return escape_literal(column_name[1:-1])
    else:
        expr = dataset.column_expr(column_name, query, parsing_context)

    if aggregate:
        expr = function_expr(aggregate, expr)

    alias = escape_alias(alias or column_name)
    return alias_expr(expr, alias, parsing_context)
    def __tag_expr(
        self,
        column_name: str,
        table_alias: str = "",
    ) -> str:
        """
        Return an expression for the value of a single named tag.

        For tags/contexts, we expand the expression depending on whether the tag is
        "promoted" to a top level column, or whether we have to look in the tags map.
        """
        col, tag = NESTED_COL_EXPR_RE.match(column_name).group(1, 2)
        # For promoted tags, return the column name.
        if col in self.__promoted_columns:
            actual_tag = self.__get_tag_column_map()[col].get(tag, tag)
            if actual_tag in self.__promoted_columns[col]:
                return qualified_column(self.__string_col(actual_tag),
                                        table_alias)

        # For the rest, return an expression that looks it up in the nested tags.
        return u'{col}.value[indexOf({col}.key, {tag})]'.format(
            **{
                'col': qualified_column(col, table_alias),
                'tag': escape_literal(tag)
            })
Beispiel #6
0
    def __tag_expr(
        self,
        parsed_col: ParsedNestedColumn,
        table_alias: str = "",
    ) -> str:
        """
        Return an expression for the value of a single named tag.

        For tags/contexts, we expand the expression depending on whether the tag is
        "promoted" to a top level column, or whether we have to look in the tags map.
        """
        # For promoted tags, return the column name.
        assert parsed_col.tag_name
        tag_name = parsed_col.tag_name
        col = parsed_col.col_name
        if col in self.__promoted_columns:
            actual_tag = self.__get_tag_column_map()[col].get(
                tag_name, tag_name)
            if actual_tag in self.__promoted_columns[col]:
                return qualified_column(self.__string_col(actual_tag),
                                        table_alias)

        # For the rest, return an expression that looks it up in the nested tags.
        return "{col}.value[indexOf({col}.key, {tag})]".format(
            **{
                "col": qualified_column(col, table_alias),
                "tag": escape_literal(tag_name),
            })
Beispiel #7
0
def column_expr(
    dataset,
    column_name,
    query: Query,
    parsing_context: ParsingContext,
    alias=None,
    aggregate=None,
):
    """
    Certain special column names expand into more complex expressions. Return
    a 2-tuple of:
        (expanded column expression, sanitized alias)

    Needs the body of the request for some extra data used to expand column expressions.
    """
    assert column_name or aggregate
    assert not aggregate or (aggregate and (column_name or alias))
    column_name = column_name or ""

    if is_function(column_name, 0):
        return complex_column_expr(dataset, column_name, query, parsing_context)
    elif isinstance(column_name, (list, tuple)) and aggregate:
        return complex_column_expr(
            dataset, [aggregate, column_name, alias], query, parsing_context
        )
    elif isinstance(column_name, str) and QUOTED_LITERAL_RE.match(column_name):
        return escape_literal(column_name[1:-1])
    else:
        expr = dataset.column_expr(column_name, query, parsing_context)
    if aggregate:
        expr = function_expr(aggregate, expr)

    # in the ORDER BY clause, column_expr may receive column names prefixed with
    # `-`. This is meant to be used for ORDER BY ... DESC.
    # This means we need to keep the `-` outside of the aliased expression when
    # we produce something like (COL AS alias) otherwise we build an invalid
    # syntax.
    # Worse, since escape_alias already does half of this work and keeps `-`
    # outside of the escaped expression we end up in this situation:
    #
    # -events.event_id becomes (-events.event_id AS -`events.event_id`)
    #
    # Thus here we strip the `-` before processing escaping and aliases and we
    # attach it back to the expression right before returning so that
    # -events.event_id becomes -(events.event_id AS `events.event_id`)
    # or
    # -`events.event_id`
    # if the alias already existed.
    #
    # The proper solution would be to strip the `-` before getting to column
    # processing, but this will be done with the new column abstraction.
    negate, col = NEGATE_RE.match(column_name).groups()
    alias = escape_alias(alias or col)
    expr_negate, expr = NEGATE_RE.match(expr).groups()
    # expr_negate and negate should never be inconsistent with each other. Though
    # will ensure this works properly before moving the `-` stripping at the beginning
    # of the method to cover tags as well.
    return f"{negate or expr_negate}{alias_expr(expr, alias, parsing_context)}"
Beispiel #8
0
def complex_column_expr(dataset,
                        expr,
                        query: Query,
                        parsing_context: ParsingContext,
                        depth=0):
    function_tuple = is_function(expr, depth)
    if function_tuple is None:
        raise ValueError(
            'complex_column_expr was given an expr %s that is not a function at depth %d.'
            % (expr, depth))

    name, args, alias = function_tuple
    out = []
    i = 0
    while i < len(args):
        next_2 = args[i:i + 2]
        if is_function(next_2, depth + 1):
            out.append(
                complex_column_expr(dataset, next_2, query, parsing_context,
                                    depth + 1))
            i += 2
        else:
            nxt = args[i]
            if is_function(nxt, depth + 1):  # Embedded function
                out.append(
                    complex_column_expr(dataset, nxt, query, parsing_context,
                                        depth + 1))
            elif isinstance(nxt, str):
                out.append(column_expr(dataset, nxt, query, parsing_context))
            else:
                out.append(escape_literal(nxt))
            i += 1

    ret = function_expr(name, ', '.join(out))
    if alias:
        ret = alias_expr(ret, alias, parsing_context)
    return ret
Beispiel #9
0
 def literal_builder(
     val: Optional[Union[str, datetime, date, List[Any], Tuple[Any], numbers.Number]]
 ) -> Any:
     return escape_literal(val)
Beispiel #10
0
 def simple_condition_builder(lhs: str, op: str, literal: Any) -> str:
     return "{} {} {}".format(lhs, op, escape_literal(literal))
Beispiel #11
0
 def unpack_array_condition_builder(lhs: str, op: str, literal: Any) -> str:
     any_or_all = "arrayExists" if op in POSITIVE_OPERATORS else "arrayAll"
     return "{}(x -> assumeNotNull(x {} {}), {})".format(
         any_or_all, op, escape_literal(literal), lhs,
     )