class TestProjectExtensionWithGroups(BaseTest):
    def setup_method(self, test_method):
        super().setup_method(test_method)
        raw_data = {'project': 2}

        self.extension = ProjectExtension(
            processor=ProjectWithGroupsProcessor()
        )
        self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema())
        self.query = Query({
            "conditions": []
        })

    def test_with_turbo(self):
        request_settings = RequestSettings(turbo=True, consistent=False, debug=False)

        self.extension.get_processor().process_query(self.query, self.valid_data, request_settings)

        assert self.query.get_conditions() == [('project_id', 'IN', [2])]

    def test_without_turbo_with_projects_needing_final(self):
        request_settings = RequestSettings(turbo=False, consistent=False, debug=False)
        replacer.set_project_needs_final(2)

        self.extension.get_processor().process_query(self.query, self.valid_data, request_settings)

        assert self.query.get_conditions() == [('project_id', 'IN', [2])]
        assert self.query.get_final()

    def test_without_turbo_without_projects_needing_final(self):
        request_settings = RequestSettings(turbo=False, consistent=False, debug=False)

        self.extension.get_processor().process_query(self.query, self.valid_data, request_settings)

        assert self.query.get_conditions() == [('project_id', 'IN', [2])]
        assert not self.query.get_final()

    def test_when_there_are_not_many_groups_to_exclude(self):
        request_settings = RequestSettings(turbo=False, consistent=False, debug=False)
        state.set_config('max_group_ids_exclude', 5)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(self.query, self.valid_data, request_settings)

        expected = [
            ('project_id', 'IN', [2]),
            (['assumeNotNull', ['group_id']], 'NOT IN', [100, 101, 102])
        ]
        assert self.query.get_conditions() == expected
        assert not self.query.get_final()

    def test_when_there_are_too_many_groups_to_exclude(self):
        request_settings = RequestSettings(turbo=False, consistent=False, debug=False)
        state.set_config('max_group_ids_exclude', 2)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(self.query, self.valid_data, request_settings)

        assert self.query.get_conditions() == [('project_id', 'IN', [2])]
        assert self.query.get_final()
Exemple #2
0
    def __init__(
        self,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        # Snuba query structure
        # Referencing them here directly since it makes it easier
        # to process this query independently from the Snuba Query
        # and there is no risk in doing so since they are immutable.
        self.__selected_columns = query.get_selected_columns_from_ast()
        self.__condition = query.get_condition_from_ast()
        self.__groupby = query.get_groupby_from_ast()
        self.__having = query.get_having_from_ast()
        self.__orderby = query.get_orderby_from_ast()
        self.__data_source = query.get_data_source()
        self.__arrayjoin = query.get_arrayjoin_from_ast()
        self.__granularity = query.get_granularity()
        self.__limit = query.get_limit()
        self.__limitby = query.get_limitby()
        self.__offset = query.get_offset()

        if self.__having:
            assert self.__groupby, "found HAVING clause with no GROUP BY"

        # Clickhouse specific fields. Some are still in the Snuba
        # query and have to be moved.
        self.__turbo = settings.get_turbo()
        self.__final = query.get_final()
        self.__sample = query.get_sample()
        self.__hastotals = query.has_totals()
        # TODO: Pre where processing will become a step in Clickhouse Query processing
        # instead of being pulled from the Snuba Query
        self.__prewhere = query.get_prewhere_ast()

        self.__settings = settings
        self.__formatted_query: Optional[str] = None
Exemple #3
0
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u"SELECT {}".format(
            ", ".join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u"FROM {}".format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u"{} FINAL".format(from_clause)

        if not query.get_data_source().supports_sample():
            sample_rate = None
        else:
            if query.get_sample():
                sample_rate = query.get_sample()
            elif settings.get_turbo():
                sample_rate = snuba_settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None

        if sample_rate:
            from_clause = u"{} SAMPLE {}".format(from_clause, sample_rate)

        join_clause = ""
        if query.get_arrayjoin():
            join_clause = u"ARRAY JOIN {}".format(query.get_arrayjoin())

        where_clause = ""
        if query.get_conditions():
            where_clause = u"WHERE {}".format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ""
        if query.get_prewhere():
            prewhere_clause = u"PREWHERE {}".format(
                conditions_expr(dataset, query.get_prewhere(), query,
                                parsing_context))

        group_clause = ""
        if groupby:
            group_clause = "GROUP BY ({})".format(", ".join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = "{} WITH TOTALS".format(group_clause)

        having_clause = ""
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, "found HAVING clause with no GROUP BY"
            having_clause = u"HAVING {}".format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ""
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u"{} {}".format(ob.lstrip("-"),
                                "DESC" if ob.startswith("-") else "ASC")
                for ob in orderby
            ]
            order_clause = u"ORDER BY {}".format(", ".join(orderby))

        limitby_clause = ""
        if query.get_limitby() is not None:
            limitby_clause = "LIMIT {} BY {}".format(*query.get_limitby())

        limit_clause = ""
        if query.get_limit() is not None:
            limit_clause = "LIMIT {}, {}".format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = " ".join([
            c for c in [
                select_clause,
                from_clause,
                join_clause,
                prewhere_clause,
                where_clause,
                group_clause,
                having_clause,
                order_clause,
                limitby_clause,
                limit_clause,
            ] if c
        ])
Exemple #4
0
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
        prewhere_conditions: Sequence[str],
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u'SELECT {}'.format(
            ', '.join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u'FROM {}'.format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u'{} FINAL'.format(from_clause)

        if query.get_sample():
            sample_rate = query.get_sample()
        elif settings.get_turbo():
            sample_rate = snuba_settings.TURBO_SAMPLE_RATE
        else:
            sample_rate = None

        if sample_rate:
            from_clause = u'{} SAMPLE {}'.format(from_clause, sample_rate)

        join_clause = ''
        if query.get_arrayjoin():
            join_clause = u'ARRAY JOIN {}'.format(query.get_arrayjoin())

        where_clause = ''
        if query.get_conditions():
            where_clause = u'WHERE {}'.format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ''
        if prewhere_conditions:
            prewhere_clause = u'PREWHERE {}'.format(
                conditions_expr(dataset, prewhere_conditions, query,
                                parsing_context))

        group_clause = ''
        if groupby:
            group_clause = 'GROUP BY ({})'.format(', '.join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = '{} WITH TOTALS'.format(group_clause)

        having_clause = ''
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, 'found HAVING clause with no GROUP BY'
            having_clause = u'HAVING {}'.format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ''
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u'{} {}'.format(ob.lstrip('-'),
                                'DESC' if ob.startswith('-') else 'ASC')
                for ob in orderby
            ]
            order_clause = u'ORDER BY {}'.format(', '.join(orderby))

        limitby_clause = ''
        if query.get_limitby() is not None:
            limitby_clause = 'LIMIT {} BY {}'.format(*query.get_limitby())

        limit_clause = ''
        if query.get_limit() is not None:
            limit_clause = 'LIMIT {}, {}'.format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = ' '.join([
            c for c in [
                select_clause, from_clause, join_clause, prewhere_clause,
                where_clause, group_clause, having_clause, order_clause,
                limitby_clause, limit_clause
            ] if c
        ])
Exemple #5
0
class TestProjectExtensionWithGroups(BaseTest):
    def setup_method(self, test_method):
        super().setup_method(test_method)
        raw_data = {"project": 2}

        self.extension = ProjectExtension(
            processor=ProjectWithGroupsProcessor(project_column="project_id")
        )
        self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema())
        self.query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)

    def test_with_turbo(self):
        request_settings = HTTPRequestSettings(turbo=True)

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])

    def test_without_turbo_with_projects_needing_final(self):
        request_settings = HTTPRequestSettings()
        replacer.set_project_needs_final(2)

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert self.query.get_final()

    def test_without_turbo_without_projects_needing_final(self):
        request_settings = HTTPRequestSettings()

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert not self.query.get_final()

    def test_when_there_are_not_many_groups_to_exclude(self):
        request_settings = HTTPRequestSettings()
        state.set_config("max_group_ids_exclude", 5)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        expected = [
            ("project_id", "IN", [2]),
            (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]),
        ]
        assert self.query.get_conditions() == expected
        assert self.query.get_condition_from_ast() == FunctionCall(
            None,
            BooleanFunctions.AND,
            (
                FunctionCall(
                    None,
                    "notIn",
                    (
                        FunctionCall(
                            None, "assumeNotNull", (Column(None, "group_id", None),)
                        ),
                        FunctionCall(
                            None,
                            "tuple",
                            (
                                Literal(None, 100),
                                Literal(None, 101),
                                Literal(None, 102),
                            ),
                        ),
                    ),
                ),
                build_in("project_id", [2]),
            ),
        )
        assert not self.query.get_final()

    def test_when_there_are_too_many_groups_to_exclude(self):
        request_settings = HTTPRequestSettings()
        state.set_config("max_group_ids_exclude", 2)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        assert self.query.get_conditions() == [("project_id", "IN", [2])]
        assert self.query.get_condition_from_ast() == build_in("project_id", [2])
        assert self.query.get_final()