Beispiel #1
0
    def column_expr(
        self,
        column_name,
        query: Query,
        parsing_context: ParsingContext,
        table_alias: str = "",
    ):
        detected_dataset = detect_table(query, self.__events_columns,
                                        self.__transactions_columns)

        if detected_dataset == TRANSACTIONS:
            if column_name == "time":
                return self.time_expr("finish_ts", query.get_granularity(),
                                      table_alias)
            if column_name == "type":
                return "'transaction'"
            if column_name == "timestamp":
                return "finish_ts"
            if column_name == "username":
                return "user_name"
            if column_name == "email":
                return "user_email"
            if column_name == "transaction":
                return "transaction_name"
            if column_name == "message":
                return "transaction_name"
            if column_name == "title":
                return "transaction_name"
            if column_name == "group_id":
                # TODO: We return 0 here instead of NULL so conditions like group_id
                # in (1, 2, 3) will work, since Clickhouse won't run a query like:
                # SELECT (NULL AS group_id) FROM transactions WHERE group_id IN (1, 2, 3)
                # When we have the query AST, we should solve this by transforming the
                # nonsensical conditions instead.
                return "0"
            if column_name == "geo_country_code":
                column_name = "contexts[geo.country_code]"
            if column_name == "geo_region":
                column_name = "contexts[geo.region]"
            if column_name == "geo_city":
                column_name = "contexts[geo.city]"
            if self.__events_columns.get(column_name):
                return "NULL"
        else:
            if column_name == "time":
                return self.time_expr("timestamp", query.get_granularity(),
                                      table_alias)
            if column_name == "release":
                column_name = "tags[sentry:release]"
            if column_name == "dist":
                column_name = "tags[sentry:dist]"
            if column_name == "user":
                column_name = "tags[sentry:user]"
            if self.__transactions_columns.get(column_name):
                return "NULL"

        return get_dataset(detected_dataset).column_expr(
            column_name, query, parsing_context)
Beispiel #2
0
def test_full_query():
    query = Query(
        {
            "selected_columns": ["c1", "c2", "c3"],
            "conditions": [["c1", "=", "a"]],
            "arrayjoin": "tags",
            "having": [["c4", "=", "c"]],
            "groupby": ["project_id"],
            "aggregations": [["count()", "", "count"]],
            "orderby": "event_id",
            "limitby": (100, "environment"),
            "sample": 10,
            "limit": 100,
            "offset": 50,
            "totals": True,
            "granularity": 60,
        },
        TableSource("my_table", ColumnSet([])),
    )

    assert query.get_selected_columns() == ["c1", "c2", "c3"]
    assert query.get_aggregations() == [["count()", "", "count"]]
    assert query.get_groupby() == ["project_id"]
    assert query.get_conditions() == [["c1", "=", "a"]]
    assert query.get_arrayjoin() == "tags"
    assert query.get_having() == [["c4", "=", "c"]]
    assert query.get_orderby() == "event_id"
    assert query.get_limitby() == (100, "environment")
    assert query.get_sample() == 10
    assert query.get_limit() == 100
    assert query.get_offset() == 50
    assert query.has_totals() is True
    assert query.get_granularity() == 60

    assert query.get_data_source().format_from() == "my_table"
def test_query_extension_processing(
    raw_data: dict,
    expected_conditions: Sequence[Condition],
    expected_granularity: int,
):
    state.set_config('max_days', 1)
    extension = TimeSeriesExtension(
        default_granularity=60,
        default_window=datetime.timedelta(days=5),
        timestamp_column='timestamp',
    )
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query(
        {"conditions": []},
        TableSource("my_table", ColumnSet([])),
    )

    request_settings = RequestSettings(turbo=False,
                                       consistent=False,
                                       debug=False)

    extension.get_processor().process_query(query, valid_data,
                                            request_settings)
    assert query.get_conditions() == expected_conditions
    assert query.get_granularity() == expected_granularity
Beispiel #4
0
 def column_expr(self,
                 column_name,
                 query: Query,
                 parsing_context: ParsingContext,
                 table_alias: str = ""):
     if column_name in self.__time_group_columns:
         return self.__time_expr(column_name, query.get_granularity(),
                                 table_alias)
     else:
         return super().column_expr(column_name, query, parsing_context,
                                    table_alias)
Beispiel #5
0
def test_edit_query():
    query = Query(
        {
            "selected_columns": ["c1", "c2", "c3"],
            "conditions": [["c1", "=", "a"]],
            "arrayjoin": "tags",
            "having": [["c4", "=", "c"]],
            "groupby": ["project_id"],
            "aggregations": [["count()", "", "count"]],
            "orderby": "event_id",
            "limitby": (100, "environment"),
            "sample": 10,
            "limit": 100,
            "offset": 50,
            "totals": True,
        },
        TableSource("my_table", ColumnSet([])),
    )

    query.set_selected_columns(["c4"])
    assert query.get_selected_columns() == ["c4"]

    query.set_aggregations([["different_agg()", "", "something"]])
    assert query.get_aggregations() == [["different_agg()", "", "something"]]

    query.add_groupby(["more", "more2"])
    assert query.get_groupby() == ["project_id", "more", "more2"]

    query.add_conditions([["c5", "=", "9"]])
    assert query.get_conditions() == [
        ["c1", "=", "a"],
        ["c5", "=", "9"],
    ]

    query.set_conditions([["c6", "=", "10"]])
    assert query.get_conditions() == [
        ["c6", "=", "10"],
    ]

    query.set_arrayjoin("not_tags")
    assert query.get_arrayjoin() == "not_tags"

    query.set_granularity(7200)
    assert query.get_granularity() == 7200

    query.set_prewhere([["pc6", "=", "10"]])
    assert query.get_prewhere() == [["pc6", "=", "10"]]
Beispiel #6
0
 def column_expr(
     self,
     column_name,
     query: Query,
     parsing_context: ParsingContext,
     table_alias: str = "",
 ):
     # We want to permit functions here, so we need to make sure we're not trying
     # to look up lists in the dictionary or it will fail with a type error.
     if isinstance(column_name,
                   str) and column_name in self.__time_group_columns:
         real_column = self.__time_group_columns[column_name]
         return self.time_expr(real_column, query.get_granularity(),
                               table_alias)
     else:
         return super().column_expr(column_name, query, parsing_context,
                                    table_alias)
def test_query_extension_processing(
    raw_data: dict,
    expected_conditions: Sequence[Condition],
    expected_ast_condition: Expression,
    expected_granularity: int,
):
    state.set_config("max_days", 1)
    extension = TimeSeriesExtension(
        default_granularity=60,
        default_window=timedelta(days=5),
        timestamp_column="timestamp",
    )
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)

    request_settings = HTTPRequestSettings()

    extension.get_processor().process_query(query, valid_data, request_settings)
    assert query.get_conditions() == expected_conditions
    assert query.get_condition_from_ast() == expected_ast_condition
    assert query.get_granularity() == expected_granularity
Beispiel #8
0
    def __init__(
        self,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        # Snuba query structure
        # Referencing them here directly since it makes it easier
        # to process this query independently from the Snuba Query
        # and there is no risk in doing so since they are immutable.
        self.__selected_columns = query.get_selected_columns_from_ast()
        self.__condition = query.get_condition_from_ast()
        self.__groupby = query.get_groupby_from_ast()
        self.__having = query.get_having_from_ast()
        self.__orderby = query.get_orderby_from_ast()
        self.__data_source = query.get_data_source()
        self.__arrayjoin = query.get_arrayjoin_from_ast()
        self.__granularity = query.get_granularity()
        self.__limit = query.get_limit()
        self.__limitby = query.get_limitby()
        self.__offset = query.get_offset()

        if self.__having:
            assert self.__groupby, "found HAVING clause with no GROUP BY"

        # Clickhouse specific fields. Some are still in the Snuba
        # query and have to be moved.
        self.__turbo = settings.get_turbo()
        self.__final = query.get_final()
        self.__sample = query.get_sample()
        self.__hastotals = query.has_totals()
        # TODO: Pre where processing will become a step in Clickhouse Query processing
        # instead of being pulled from the Snuba Query
        self.__prewhere = query.get_prewhere_ast()

        self.__settings = settings
        self.__formatted_query: Optional[str] = None