Esempio n. 1
0
def conditions_expr(dataset,
                    conditions,
                    query: Query,
                    parsing_context: ParsingContext,
                    depth=0):
    """
    Return a boolean expression suitable for putting in the WHERE clause of the
    query.  The expression is constructed by ANDing groups of OR expressions.
    Expansion of columns is handled, as is replacement of columns with aliases,
    if the column has already been expanded and aliased elsewhere.
    """
    from snuba.clickhouse.columns import Array

    if not conditions:
        return ''

    if depth == 0:
        # dedupe conditions at top level, but keep them in order
        sub = OrderedDict(
            (conditions_expr(dataset, cond, query, parsing_context, depth + 1),
             None) for cond in conditions)
        return u' AND '.join(s for s in sub.keys() if s)
    elif is_condition(conditions):
        lhs, op, lit = dataset.process_condition(conditions)

        # facilitate deduping IN conditions by sorting them.
        if op in ('IN', 'NOT IN') and isinstance(lit, tuple):
            lit = tuple(sorted(lit))

        # If the LHS is a simple column name that refers to an array column
        # (and we are not arrayJoining on that column, which would make it
        # scalar again) and the RHS is a scalar value, we assume that the user
        # actually means to check if any (or all) items in the array match the
        # predicate, so we return an `any(x == value for x in array_column)`
        # type expression. We assume that operators looking for a specific value
        # (IN, =, LIKE) are looking for rows where any array value matches, and
        # exclusionary operators (NOT IN, NOT LIKE, !=) are looking for rows
        # where all elements match (eg. all NOT LIKE 'foo').
        columns = dataset.get_dataset_schemas().get_read_schema().get_columns()
        if (isinstance(lhs, str) and lhs in columns
                and isinstance(columns[lhs].type, Array)
                and columns[lhs].base_name != query.get_arrayjoin()
                and not isinstance(lit, (list, tuple))):
            any_or_all = 'arrayExists' if op in POSITIVE_OPERATORS else 'arrayAll'
            return u'{}(x -> assumeNotNull(x {} {}), {})'.format(
                any_or_all, op, escape_literal(lit),
                column_expr(dataset, lhs, query, parsing_context))
        else:
            return u'{} {} {}'.format(
                column_expr(dataset, lhs, query, parsing_context), op,
                escape_literal(lit))

    elif depth == 1:
        sub = (conditions_expr(dataset, cond, query, parsing_context,
                               depth + 1) for cond in conditions)
        sub = [s for s in sub if s]
        res = u' OR '.join(sub)
        return u'({})'.format(res) if len(sub) > 1 else res
    else:
        raise InvalidConditionException(str(conditions))
Esempio n. 2
0
def all_referenced_columns(query: Query):
    """
    Return the set of all columns that are used by a query.
    """
    col_exprs: MutableSequence[Any] = []

    if query.get_arrayjoin():
        col_exprs.extend(to_list(query.get_arrayjoin()))
    if query.get_groupby():
        col_exprs.extend(to_list(query.get_groupby()))
    if query.get_orderby():
        col_exprs.extend(to_list(query.get_orderby()))
    if query.get_selected_columns():
        col_exprs.extend(to_list(query.get_selected_columns()))

    # Conditions need flattening as they can be nested as AND/OR
    if query.get_conditions():
        flat_conditions = list(
            chain(*[[c] if is_condition(c) else c
                    for c in query.get_conditions()]))
        col_exprs.extend([c[0] for c in flat_conditions])

    if query.get_aggregations():
        col_exprs.extend([a[1] for a in query.get_aggregations()])

    # Return the set of all columns referenced in any expression
    return set(chain(*[columns_in_expr(ex) for ex in col_exprs]))
Esempio n. 3
0
def detect_table(query: Query, events_only_columns: ColumnSet,
                 transactions_only_columns: ColumnSet) -> str:
    """
    Given a query, we attempt to guess whether it is better to fetch data from the
    "events" or "transactions" storage. This is going to be wrong in some cases.
    """
    # First check for a top level condition that matches either type = transaction
    # type != transaction.
    conditions = query.get_conditions()
    if conditions:
        for idx, condition in enumerate(conditions):
            if is_condition(condition):
                if tuple(condition) == ("type", "=", "error"):
                    return EVENTS
                elif tuple(condition) == ("type", "=", "transaction"):
                    return TRANSACTIONS

    # Check for any conditions that reference a table specific field
    condition_columns = query.get_columns_referenced_in_conditions()
    if any(events_only_columns.get(col) for col in condition_columns):
        return EVENTS
    if any(transactions_only_columns.get(col) for col in condition_columns):
        return TRANSACTIONS

    # Check for any other references to a table specific field
    all_referenced_columns = query.get_all_referenced_columns()
    if any(events_only_columns.get(col) for col in all_referenced_columns):
        return EVENTS
    if any(
            transactions_only_columns.get(col)
            for col in all_referenced_columns):
        return TRANSACTIONS

    # Use events by default
    return EVENTS
Esempio n. 4
0
 def __replace_col_in_condition(
     self,
     condition: Condition,
     old_column: str,
     new_column: str,
 ) -> Condition:
     """
     Replaces a column in a structured condition. This is a level above replace_col_in_expression
     since conditions are in the form [expression, operator, literal] (which is not fully correct
     since the right side of the condition should be an expression as well but this constraint is
     imposed by the query schema and get_all_referenced_columns behaves accordingly).
     Conditions can also be nested.
     """
     if is_condition(condition):
         return [
             self.__replace_col_in_expression(condition[0], old_column,
                                              new_column),
             condition[1],
             condition[2],
         ]
     elif isinstance(condition, (tuple, list)):
         # nested condition
         return [
             self.__replace_col_in_condition(cond, old_column, new_column)
             for cond in condition
         ]
     else:
         # Don't know what this is
         return condition
Esempio n. 5
0
 def __add_flat_conditions(
         self,
         col_exprs: MutableSequence[Any],
         conditions=Optional[Sequence[Condition]]) -> None:
     if conditions:
         flat_conditions = list(
             chain(*[[c] if is_condition(c) else c for c in conditions]))
         col_exprs.extend([c[0] for c in flat_conditions])
Esempio n. 6
0
def parse_and_run_query(dataset, request: Request, timer) -> QueryResult:
    from_date, to_date = TimeSeriesExtensionProcessor.get_time_limit(
        request.extensions['timeseries'])

    extensions = dataset.get_extensions()

    for name, extension in extensions.items():
        extension.get_processor().process_query(request.query,
                                                request.extensions[name],
                                                request.settings)
    request.query.add_conditions(dataset.default_conditions())

    if request.settings.get_turbo():
        request.query.set_final(False)

    prewhere_conditions = []
    # Add any condition to PREWHERE if:
    # - It is a single top-level condition (not OR-nested), and
    # - Any of its referenced columns are in dataset.get_prewhere_keys()
    prewhere_candidates = [(util.columns_in_expr(cond[0]), cond)
                           for cond in request.query.get_conditions()
                           if util.is_condition(cond) and any(
                               col in dataset.get_prewhere_keys()
                               for col in util.columns_in_expr(cond[0]))]
    # Use the condition that has the highest priority (based on the
    # position of its columns in the prewhere keys list)
    prewhere_candidates = sorted(
        [(min(dataset.get_prewhere_keys().index(col)
              for col in cols if col in dataset.get_prewhere_keys()), cond)
         for cols, cond in prewhere_candidates],
        key=lambda priority_and_col: priority_and_col[0])
    if prewhere_candidates:
        prewhere_conditions = [cond for _, cond in prewhere_candidates
                               ][:settings.MAX_PREWHERE_CONDITIONS]
        request.query.set_conditions(
            list(
                filter(lambda cond: cond not in prewhere_conditions,
                       request.query.get_conditions())))

    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    # TODO: consider moving the performance logic and the pre_where generation into
    # ClickhouseQuery since they are Clickhouse specific
    query = ClickhouseQuery(dataset, request.query, request.settings,
                            prewhere_conditions)
    timer.mark('prepare_query')

    stats = {
        'clickhouse_table': source,
        'final': request.query.get_final(),
        'referrer': http_request.referrer,
        'num_days': (to_date - from_date).days,
        'sample': request.query.get_sample(),
    }

    return raw_query(request, query, clickhouse_ro, timer, stats)
Esempio n. 7
0
 def _get_prewhere_candidates(
     self, query: Query, prewhere_keys: Sequence[str]
 ) -> Sequence[Tuple[Iterable[str], Condition]]:
     # Add any condition to PREWHERE if:
     # - It is a single top-level condition (not OR-nested), and
     # - Any of its referenced columns are in prewhere_keys
     conditions = query.get_conditions()
     if not conditions:
         return []
     return [(util.columns_in_expr(cond[0]), cond) for cond in conditions
             if util.is_condition(cond) and cond[1] in ALLOWED_OPERATORS
             and any(col in prewhere_keys
                     for col in util.columns_in_expr(cond[0]))]
Esempio n. 8
0
    def find_project_id_sets(
        conditions: Sequence[Condition], ) -> Sequence[Set[int]]:
        """
        Scans a potentially nested sequence of conditions.
        For each simple condition adds to the output the set of project ids referenced
        by the condition.
        For each nested condition, it assumes it is a union of simple conditions
        (which is the only supported valid case by the Query object) and adds the union of the
        referenced project ids to the output.
        """
        project_id_sets: List[Set[int]] = list()
        for c in conditions:
            if is_condition(c):
                # This is a simple condition. Can extract the project ids directly.
                # Supports these kinds of conditions
                # ["col", "=", 1]
                # ["col", "IN", [1,2,3]]
                # ["col", "IN", (1,2,3)]
                if c[0] == project_column:
                    if c[1] == "=" and isinstance(c[2], int):
                        project_id_sets.append({c[2]})
                    elif c[1] == "IN" and all(
                            isinstance(project, int) for project in c[2]):
                        project_id_sets.append(set(c[2]))

            elif all(is_condition(second_level) for second_level in c):
                # This is supposed to be a union of simple conditions. Need to union
                # the sets of project ids.
                sets_to_unite = find_project_id_sets(c)
                if sets_to_unite:
                    project_id_sets.append(
                        reduce(lambda x, y: x | y, sets_to_unite))
            else:
                raise ValueError(f"Invalid condition {conditions}")

        return project_id_sets
Esempio n. 9
0
 def process_query(self, query: Query, request_settings: RequestSettings,) -> None:
     max_prewhere_conditions: int = (
         self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS
     )
     prewhere_keys = query.get_data_source().get_prewhere_candidates()
     if not prewhere_keys:
         return
     prewhere_conditions: Sequence[Condition] = []
     # Add any condition to PREWHERE if:
     # - It is a single top-level condition (not OR-nested), and
     # - Any of its referenced columns are in prewhere_keys
     conditions = query.get_conditions()
     if not conditions:
         return
     prewhere_candidates = [
         (util.columns_in_expr(cond[0]), cond)
         for cond in conditions
         if util.is_condition(cond)
         and any(col in prewhere_keys for col in util.columns_in_expr(cond[0]))
     ]
     # Use the condition that has the highest priority (based on the
     # position of its columns in the prewhere keys list)
     prewhere_candidates = sorted(
         [
             (
                 min(
                     prewhere_keys.index(col) for col in cols if col in prewhere_keys
                 ),
                 cond,
             )
             for cols, cond in prewhere_candidates
         ],
         key=lambda priority_and_col: priority_and_col[0],
     )
     if prewhere_candidates:
         prewhere_conditions = [cond for _, cond in prewhere_candidates][
             :max_prewhere_conditions
         ]
         query.set_conditions(
             list(filter(lambda cond: cond not in prewhere_conditions, conditions))
         )
     query.set_prewhere(prewhere_conditions)
Esempio n. 10
0
    def __is_optimizable(self, condition: Condition,
                         column: str) -> Optional[OptimizableCondition]:
        """
        Recognize if the condition can be optimized.
        This includes these kind of conditions:
        - top level conditions. No nested OR
        - the condition has to be either in the form tag[t] = value
        - functions referencing the tags as parameters are not taken
          into account except for ifNull.
        - Both EQ and NEQ conditions are optimized.
        """
        if not is_condition(condition):
            return None
        if condition[1] not in [Operand.EQ.value, Operand.NEQ.value]:
            return None
        if not isinstance(condition[2], str):
            # We can only support literals for now.
            return None
        lhs = condition[0]

        # This unpacks the ifNull function. This is just an optimization to make this class more
        # useful since the product wraps tags access into ifNull very often and it is a trivial
        # function to unpack. We could exptend it to more functions later.
        function_expr = is_function(lhs, 0)
        if function_expr and function_expr[0] == "ifNull" and len(
                function_expr[1]) > 0:
            lhs = function_expr[1][0]
        if not isinstance(lhs, str):
            return None

        # Now we have a condition in the form of: ["tags[something]", "=", "a string"]
        tag = NESTED_COL_EXPR_RE.match(lhs)
        if tag and tag[1] == self.__nested_col:
            # tag[0] is the full expression that matches the re.
            nested_col_key = tag[2]
            return OptimizableCondition(
                nested_col_key=nested_col_key,
                operand=Operand.EQ if condition[1] == "=" else Operand.NEQ,
                value=condition[2],
            )
        return None
Esempio n. 11
0
        def extract_tags_from_condition(
            cond: Sequence[Condition],
        ) -> Optional[List[str]]:
            if not cond:
                return []

            ret = []
            for c in cond:
                if not is_condition(c):
                    # This is an OR
                    return None

                if c[1] == "=" and c[0] == "tags_key" and isinstance(c[2], str):
                    ret.append(str(c[2]))

                elif (
                    c[1] == "IN"
                    and c[0] == "tags_key"
                    and isinstance(c[2], (list, tuple))
                ):
                    ret.extend([str(tag) for tag in c[2]])

            return ret
Esempio n. 12
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        conditions = query.get_conditions()
        if not conditions:
            return

        # Enable the processor only if we have enough data in the flattened
        # columns. Which have been deployed at BEGINNING_OF_TIME. If the query
        # starts earlier than that we do not apply the optimization.
        if self.__beginning_of_time:
            apply_optimization = False
            for condition in conditions:
                if (is_condition(condition) and isinstance(condition[0], str)
                        and condition[0] in self.__timestamp_cols
                        and condition[1] in (">=", ">")
                        and isinstance(condition[2], str)):
                    try:
                        start_ts = parse_datetime(condition[2])
                        if (start_ts -
                                self.__beginning_of_time).total_seconds() > 0:
                            apply_optimization = True
                    except Exception:
                        # We should not get here, it means the from timestamp is malformed
                        # Returning here is just for safety
                        logger.error(
                            "Cannot parse start date for NestedFieldOptimizer: %r",
                            condition,
                        )
                        return
            if not apply_optimization:
                return

        # Do not use flattened tags if tags are being unpacked anyway. In that case
        # using flattened tags only implies loading an additional column thus making
        # the query heavier and slower
        if self.__has_tags(query.get_arrayjoin_from_ast()):
            return
        if query.get_groupby_from_ast():
            for expression in query.get_groupby_from_ast():
                if self.__has_tags(expression):
                    return
        if self.__has_tags(query.get_having_from_ast()):
            return

        if query.get_orderby_from_ast():
            for orderby in query.get_orderby_from_ast():
                if self.__has_tags(orderby.expression):
                    return

        new_conditions = []
        positive_like_expression: List[str] = []
        negative_like_expression: List[str] = []

        for c in conditions:
            keyvalue = self.__is_optimizable(c, self.__nested_col)
            if not keyvalue:
                new_conditions.append(c)
            else:
                expression = f"{escape_field(keyvalue.nested_col_key)}={escape_field(keyvalue.value)}"
                if keyvalue.operand == Operand.EQ:
                    positive_like_expression.append(expression)
                else:
                    negative_like_expression.append(expression)

        if positive_like_expression:
            # Positive conditions "=" are all merged together in one LIKE expression
            positive_like_expression = sorted(positive_like_expression)
            like_formatted = f"%|{'|%|'.join(positive_like_expression)}|%"
            new_conditions.append(
                [self.__flattened_col, "LIKE", like_formatted])

        for expression in negative_like_expression:
            # Negative conditions "!=" cannot be merged together. We can still transform
            # them into NOT LIKE statements, but each condition has to be one
            # statement.
            not_like_formatted = f"%|{expression}|%"
            new_conditions.append(
                [self.__flattened_col, "NOT LIKE", not_like_formatted])

        query.set_conditions(new_conditions)
Esempio n. 13
0
def parse_conditions(
    operand_builder: Callable[[Any, ColumnSet, Set[str]], TExpression],
    and_builder: Callable[[Sequence[TExpression]], Optional[TExpression]],
    or_builder: Callable[[Sequence[TExpression]], Optional[TExpression]],
    unpack_array_condition_builder: Callable[[TExpression, str, Any],
                                             TExpression],
    simple_condition_builder: Callable[[TExpression, str, Any], TExpression],
    entity: Entity,
    conditions: Any,
    arrayjoin_cols: Set[str],
    depth: int = 0,
) -> Optional[TExpression]:
    """
    Return a boolean expression suitable for putting in the WHERE clause of the
    query.  The expression is constructed by ANDing groups of OR expressions.
    Expansion of columns is handled, as is replacement of columns with aliases,
    if the column has already been expanded and aliased elsewhere.

    operand_builder: Builds the TExpression representing the left hand side
      of a simple condition. This can be as nested as the user wants
    and_builder / or_builder: Combine a list of expressions in AND/OR
    unpack_array_condition_builder: Deals with a special case where we unpack conditions
      on array columns. More details in the code.
    simple_condition_builder: Generates a simple condition made by expression on the
      left hand side, an operator and a literal on the right hand side.
    """
    from snuba.clickhouse.columns import Array

    if not conditions:
        return None

    if depth == 0:
        # dedupe conditions at top level, but keep them in order
        sub = OrderedDict((
            parse_conditions(
                operand_builder,
                and_builder,
                or_builder,
                unpack_array_condition_builder,
                simple_condition_builder,
                entity,
                cond,
                arrayjoin_cols,
                depth + 1,
            ),
            None,
        ) for cond in conditions)
        return and_builder([s for s in sub.keys() if s])
    elif is_condition(conditions):
        try:
            lhs, op, lit = conditions
        except Exception as cause:
            raise ParsingException(f"Cannot process condition {conditions}",
                                   cause) from cause

        # facilitate deduping IN conditions by sorting them.
        if op in ("IN", "NOT IN") and isinstance(lit, tuple):
            lit = tuple(sorted(lit))

        # If the LHS is a simple column name that refers to an array column
        # (and we are not arrayJoining on that column, which would make it
        # scalar again) and the RHS is a scalar value, we assume that the user
        # actually means to check if any (or all) items in the array match the
        # predicate, so we return an `any(x == value for x in array_column)`
        # type expression. We assume that operators looking for a specific value
        # (IN, =, LIKE) are looking for rows where any array value matches, and
        # exclusionary operators (NOT IN, NOT LIKE, !=) are looking for rows
        # where all elements match (eg. all NOT LIKE 'foo').
        columns = entity.get_data_model()
        if (isinstance(lhs, str) and lhs in columns
                and isinstance(columns[lhs].type, Array)
                and columns[lhs].base_name not in arrayjoin_cols
                and columns[lhs].flattened not in arrayjoin_cols
                and not isinstance(lit, (list, tuple))):
            return unpack_array_condition_builder(
                operand_builder(lhs, entity.get_data_model(), arrayjoin_cols),
                op,
                lit,
            )
        else:
            return simple_condition_builder(
                operand_builder(lhs, entity.get_data_model(), arrayjoin_cols),
                op,
                lit,
            )

    elif depth == 1:
        sub_expression = (parse_conditions(
            operand_builder,
            and_builder,
            or_builder,
            unpack_array_condition_builder,
            simple_condition_builder,
            entity,
            cond,
            arrayjoin_cols,
            depth + 1,
        ) for cond in conditions)
        return or_builder([s for s in sub_expression if s])
    else:
        raise InvalidConditionException(str(conditions))
Esempio n. 14
0
def _identify_condition(condition: AnyType, field: str, operator: str) -> bool:
    return (is_condition(condition) and condition[0] == field
            and condition[1] == operator)
Esempio n. 15
0
def parse_and_run_query(validated_body, timer):
    body = deepcopy(validated_body)
    turbo = body.get('turbo', False)
    max_days, table, date_align, config_sample, force_final, max_group_ids_exclude = state.get_configs([
        ('max_days', None),
        ('clickhouse_table', settings.CLICKHOUSE_TABLE),
        ('date_align_seconds', 1),
        ('sample', 1),
        # 1: always use FINAL, 0: never use final, undefined/None: use project setting.
        ('force_final', 0 if turbo else None),
        ('max_group_ids_exclude', settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE),
    ])
    stats = {}
    to_date = util.parse_datetime(body['to_date'], date_align)
    from_date = util.parse_datetime(body['from_date'], date_align)
    assert from_date <= to_date

    if max_days is not None and (to_date - from_date).days > max_days:
        from_date = to_date - timedelta(days=max_days)

    where_conditions = body.get('conditions', [])
    where_conditions.extend([
        ('timestamp', '>=', from_date),
        ('timestamp', '<', to_date),
        ('deleted', '=', 0),
    ])
    # NOTE: we rely entirely on the schema to make sure that regular snuba
    # queries are required to send a project_id filter. Some other special
    # internal query types do not require a project_id filter.
    project_ids = util.to_list(body['project'])
    if project_ids:
        where_conditions.append(('project_id', 'IN', project_ids))

    having_conditions = body.get('having', [])

    aggregate_exprs = [
        util.column_expr(col, body, alias, agg)
        for (agg, col, alias) in body['aggregations']
    ]
    groupby = util.to_list(body['groupby'])
    group_exprs = [util.column_expr(gb, body) for gb in groupby]

    selected_cols = [util.column_expr(util.tuplify(colname), body)
                     for colname in body.get('selected_columns', [])]

    select_exprs = group_exprs + aggregate_exprs + selected_cols
    select_clause = u'SELECT {}'.format(', '.join(select_exprs))

    from_clause = u'FROM {}'.format(table)

    # For now, we only need FINAL if:
    #    1. The project has been marked as needing FINAL (in redis) because of recent
    #       replacements (and it affects too many groups for us just to exclude
    #       those groups from the query)
    #    OR
    #    2. the force_final setting = 1
    needs_final, exclude_group_ids = get_projects_query_flags(project_ids)
    if len(exclude_group_ids) > max_group_ids_exclude:
        # Cap the number of groups to exclude by query and flip to using FINAL if necessary
        needs_final = True
        exclude_group_ids = []

    used_final = False
    if force_final == 1 or (force_final is None and needs_final):
        from_clause = u'{} FINAL'.format(from_clause)
        used_final = True
    elif exclude_group_ids:
        where_conditions.append(('group_id', 'NOT IN', exclude_group_ids))

    sample = body.get('sample', settings.TURBO_SAMPLE_RATE if turbo else config_sample)
    if sample != 1:
        from_clause = u'{} SAMPLE {}'.format(from_clause, sample)

    joins = []

    if 'arrayjoin' in body:
        joins.append(u'ARRAY JOIN {}'.format(body['arrayjoin']))
    join_clause = ' '.join(joins)

    where_clause = ''
    if where_conditions:
        where_conditions = list(set(util.tuplify(where_conditions)))
        where_clause = u'WHERE {}'.format(util.conditions_expr(where_conditions, body))

    prewhere_conditions = []
    if settings.PREWHERE_KEYS:
        # Add any condition to PREWHERE if:
        # - It is a single top-level condition (not OR-nested), and
        # - Any of its referenced columns are in PREWHERE_KEYS
        prewhere_candidates = [
            (util.columns_in_expr(cond[0]), cond)
            for cond in where_conditions if util.is_condition(cond) and
            any(col in settings.PREWHERE_KEYS for col in util.columns_in_expr(cond[0]))
        ]
        # Use the condition that has the highest priority (based on the
        # position of its columns in the PREWHERE_KEYS list)
        prewhere_candidates = sorted([
            (min(settings.PREWHERE_KEYS.index(col) for col in cols if col in settings.PREWHERE_KEYS), cond)
            for cols, cond in prewhere_candidates
        ])
        if prewhere_candidates:
            prewhere_conditions = [cond for _, cond in prewhere_candidates][:settings.MAX_PREWHERE_CONDITIONS]

    prewhere_clause = ''
    if prewhere_conditions:
        prewhere_clause = u'PREWHERE {}'.format(util.conditions_expr(prewhere_conditions, body))

    having_clause = ''
    if having_conditions:
        assert groupby, 'found HAVING clause with no GROUP BY'
        having_clause = u'HAVING {}'.format(util.conditions_expr(having_conditions, body))

    group_clause = ', '.join(util.column_expr(gb, body) for gb in groupby)
    if group_clause:
        if body.get('totals', False):
            group_clause = 'GROUP BY ({}) WITH TOTALS'.format(group_clause)
        else:
            group_clause = 'GROUP BY ({})'.format(group_clause)

    order_clause = ''
    if body.get('orderby'):
        orderby = [util.column_expr(util.tuplify(ob), body) for ob in util.to_list(body['orderby'])]
        orderby = [u'{} {}'.format(
            ob.lstrip('-'),
            'DESC' if ob.startswith('-') else 'ASC'
        ) for ob in orderby]
        order_clause = u'ORDER BY {}'.format(', '.join(orderby))

    limitby_clause = ''
    if 'limitby' in body:
        limitby_clause = 'LIMIT {} BY {}'.format(*body['limitby'])

    limit_clause = ''
    if 'limit' in body:
        limit_clause = 'LIMIT {}, {}'.format(body.get('offset', 0), body['limit'])

    sql = ' '.join([c for c in [
        select_clause,
        from_clause,
        join_clause,
        prewhere_clause,
        where_clause,
        group_clause,
        having_clause,
        order_clause,
        limitby_clause,
        limit_clause
    ] if c])

    timer.mark('prepare_query')

    stats.update({
        'clickhouse_table': table,
        'final': used_final,
        'referrer': request.referrer,
        'num_days': (to_date - from_date).days,
        'num_projects': len(project_ids),
        'sample': sample,
    })

    return util.raw_query(
        validated_body, sql, clickhouse_ro, timer, stats
    )