class TestProjectExtensionWithGroups(BaseTest): def setup_method(self, test_method): super().setup_method(test_method) raw_data = {'project': 2} self.extension = ProjectExtension( processor=ProjectWithGroupsProcessor() ) self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema()) self.query = Query({ "conditions": [] }) def test_with_turbo(self): request_settings = RequestSettings(turbo=True, consistent=False, debug=False) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] def test_without_turbo_with_projects_needing_final(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) replacer.set_project_needs_final(2) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] assert self.query.get_final() def test_without_turbo_without_projects_needing_final(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] assert not self.query.get_final() def test_when_there_are_not_many_groups_to_exclude(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) state.set_config('max_group_ids_exclude', 5) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) expected = [ ('project_id', 'IN', [2]), (['assumeNotNull', ['group_id']], 'NOT IN', [100, 101, 102]) ] assert self.query.get_conditions() == expected assert not self.query.get_final() def test_when_there_are_too_many_groups_to_exclude(self): request_settings = RequestSettings(turbo=False, consistent=False, debug=False) state.set_config('max_group_ids_exclude', 2) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query(self.query, self.valid_data, request_settings) assert self.query.get_conditions() == [('project_id', 'IN', [2])] assert self.query.get_final()
def __init__( self, query: Query, settings: RequestSettings, ) -> None: # Snuba query structure # Referencing them here directly since it makes it easier # to process this query independently from the Snuba Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" # Clickhouse specific fields. Some are still in the Snuba # query and have to be moved. self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() # TODO: Pre where processing will become a step in Clickhouse Query processing # instead of being pulled from the Snuba Query self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__formatted_query: Optional[str] = None
def __init__( self, dataset: Dataset, query: Query, settings: RequestSettings, ) -> None: parsing_context = ParsingContext() aggregate_exprs = [ column_expr(dataset, col, query, parsing_context, alias, agg) for (agg, col, alias) in query.get_aggregations() ] groupby = util.to_list(query.get_groupby()) group_exprs = [ column_expr(dataset, gb, query, parsing_context) for gb in groupby ] column_names = query.get_selected_columns() or [] selected_cols = [ column_expr(dataset, util.tuplify(colname), query, parsing_context) for colname in column_names ] select_clause = u"SELECT {}".format( ", ".join(group_exprs + aggregate_exprs + selected_cols)) from_clause = u"FROM {}".format(query.get_data_source().format_from()) if query.get_final(): from_clause = u"{} FINAL".format(from_clause) if not query.get_data_source().supports_sample(): sample_rate = None else: if query.get_sample(): sample_rate = query.get_sample() elif settings.get_turbo(): sample_rate = snuba_settings.TURBO_SAMPLE_RATE else: sample_rate = None if sample_rate: from_clause = u"{} SAMPLE {}".format(from_clause, sample_rate) join_clause = "" if query.get_arrayjoin(): join_clause = u"ARRAY JOIN {}".format(query.get_arrayjoin()) where_clause = "" if query.get_conditions(): where_clause = u"WHERE {}".format( conditions_expr(dataset, query.get_conditions(), query, parsing_context)) prewhere_clause = "" if query.get_prewhere(): prewhere_clause = u"PREWHERE {}".format( conditions_expr(dataset, query.get_prewhere(), query, parsing_context)) group_clause = "" if groupby: group_clause = "GROUP BY ({})".format(", ".join( column_expr(dataset, gb, query, parsing_context) for gb in groupby)) if query.has_totals(): group_clause = "{} WITH TOTALS".format(group_clause) having_clause = "" having_conditions = query.get_having() if having_conditions: assert groupby, "found HAVING clause with no GROUP BY" having_clause = u"HAVING {}".format( conditions_expr(dataset, having_conditions, query, parsing_context)) order_clause = "" if query.get_orderby(): orderby = [ column_expr(dataset, util.tuplify(ob), query, parsing_context) for ob in util.to_list(query.get_orderby()) ] orderby = [ u"{} {}".format(ob.lstrip("-"), "DESC" if ob.startswith("-") else "ASC") for ob in orderby ] order_clause = u"ORDER BY {}".format(", ".join(orderby)) limitby_clause = "" if query.get_limitby() is not None: limitby_clause = "LIMIT {} BY {}".format(*query.get_limitby()) limit_clause = "" if query.get_limit() is not None: limit_clause = "LIMIT {}, {}".format(query.get_offset(), query.get_limit()) self.__formatted_query = " ".join([ c for c in [ select_clause, from_clause, join_clause, prewhere_clause, where_clause, group_clause, having_clause, order_clause, limitby_clause, limit_clause, ] if c ])
def __init__( self, dataset: Dataset, query: Query, settings: RequestSettings, prewhere_conditions: Sequence[str], ) -> None: parsing_context = ParsingContext() aggregate_exprs = [ column_expr(dataset, col, query, parsing_context, alias, agg) for (agg, col, alias) in query.get_aggregations() ] groupby = util.to_list(query.get_groupby()) group_exprs = [ column_expr(dataset, gb, query, parsing_context) for gb in groupby ] column_names = query.get_selected_columns() or [] selected_cols = [ column_expr(dataset, util.tuplify(colname), query, parsing_context) for colname in column_names ] select_clause = u'SELECT {}'.format( ', '.join(group_exprs + aggregate_exprs + selected_cols)) from_clause = u'FROM {}'.format(query.get_data_source().format_from()) if query.get_final(): from_clause = u'{} FINAL'.format(from_clause) if query.get_sample(): sample_rate = query.get_sample() elif settings.get_turbo(): sample_rate = snuba_settings.TURBO_SAMPLE_RATE else: sample_rate = None if sample_rate: from_clause = u'{} SAMPLE {}'.format(from_clause, sample_rate) join_clause = '' if query.get_arrayjoin(): join_clause = u'ARRAY JOIN {}'.format(query.get_arrayjoin()) where_clause = '' if query.get_conditions(): where_clause = u'WHERE {}'.format( conditions_expr(dataset, query.get_conditions(), query, parsing_context)) prewhere_clause = '' if prewhere_conditions: prewhere_clause = u'PREWHERE {}'.format( conditions_expr(dataset, prewhere_conditions, query, parsing_context)) group_clause = '' if groupby: group_clause = 'GROUP BY ({})'.format(', '.join( column_expr(dataset, gb, query, parsing_context) for gb in groupby)) if query.has_totals(): group_clause = '{} WITH TOTALS'.format(group_clause) having_clause = '' having_conditions = query.get_having() if having_conditions: assert groupby, 'found HAVING clause with no GROUP BY' having_clause = u'HAVING {}'.format( conditions_expr(dataset, having_conditions, query, parsing_context)) order_clause = '' if query.get_orderby(): orderby = [ column_expr(dataset, util.tuplify(ob), query, parsing_context) for ob in util.to_list(query.get_orderby()) ] orderby = [ u'{} {}'.format(ob.lstrip('-'), 'DESC' if ob.startswith('-') else 'ASC') for ob in orderby ] order_clause = u'ORDER BY {}'.format(', '.join(orderby)) limitby_clause = '' if query.get_limitby() is not None: limitby_clause = 'LIMIT {} BY {}'.format(*query.get_limitby()) limit_clause = '' if query.get_limit() is not None: limit_clause = 'LIMIT {}, {}'.format(query.get_offset(), query.get_limit()) self.__formatted_query = ' '.join([ c for c in [ select_clause, from_clause, join_clause, prewhere_clause, where_clause, group_clause, having_clause, order_clause, limitby_clause, limit_clause ] if c ])
class TestProjectExtensionWithGroups(BaseTest): def setup_method(self, test_method): super().setup_method(test_method) raw_data = {"project": 2} self.extension = ProjectExtension( processor=ProjectWithGroupsProcessor(project_column="project_id") ) self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema()) self.query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) def test_with_turbo(self): request_settings = HTTPRequestSettings(turbo=True) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) def test_without_turbo_with_projects_needing_final(self): request_settings = HTTPRequestSettings() replacer.set_project_needs_final(2) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert self.query.get_final() def test_without_turbo_without_projects_needing_final(self): request_settings = HTTPRequestSettings() self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert not self.query.get_final() def test_when_there_are_not_many_groups_to_exclude(self): request_settings = HTTPRequestSettings() state.set_config("max_group_ids_exclude", 5) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) expected = [ ("project_id", "IN", [2]), (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]), ] assert self.query.get_conditions() == expected assert self.query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall( None, "assumeNotNull", (Column(None, "group_id", None),) ), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ), ) assert not self.query.get_final() def test_when_there_are_too_many_groups_to_exclude(self): request_settings = HTTPRequestSettings() state.set_config("max_group_ids_exclude", 2) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) assert self.query.get_conditions() == [("project_id", "IN", [2])] assert self.query.get_condition_from_ast() == build_in("project_id", [2]) assert self.query.get_final()