Exemple #1
0
    def test_environment_filter(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:prod",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.EQ, "prod"),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:[dev, prod]",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()
Exemple #2
0
 def test_count_if(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count_if(event.type,equals,transaction)",
             'count_if(event.type,notEquals,"transaction")',
         ],
     )
     self.assertCountEqual(query.where, self.default_conditions)
     self.assertCountEqual(
         query.aggregates,
         [
             Function(
                 "countIf",
                 [
                     Function("equals", [Column("type"), "transaction"]),
                 ],
                 "count_if_event_type_equals_transaction",
             ),
             Function(
                 "countIf",
                 [
                     Function("notEquals", [Column("type"), "transaction"]),
                 ],
                 "count_if_event_type_notEquals__transaction",
             ),
         ],
     )
Exemple #3
0
 def test_count_if_with_tags(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count_if(foo,equals,bar)",
             'count_if(foo,notEquals,"baz")',
         ],
     )
     self.assertCountEqual(query.where, self.default_conditions)
     self.assertCountEqual(
         query.aggregates,
         [
             Function(
                 "countIf",
                 [
                     Function("equals", [Column("tags[foo]"), "bar"]),
                 ],
                 "count_if_foo_equals_bar",
             ),
             Function(
                 "countIf",
                 [
                     Function("notEquals", [Column("tags[foo]"), "baz"]),
                 ],
                 "count_if_foo_notEquals__baz",
             ),
         ],
     )
Exemple #4
0
    def test_project_alias_column(self):
        # TODO(snql-boolean): Update this to match the corresponding test in test_filter
        project1 = self.create_project()
        project2 = self.create_project()
        self.params["project_id"] = [project1.id, project2.id]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["project"])

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("project_id"), Op.IN,
                          [project1.id, project2.id]),
                Condition(Column("timestamp"), Op.GTE, self.start),
                Condition(Column("timestamp"), Op.LT, self.end),
            ],
        )
        self.assertCountEqual(
            query.select,
            [
                Function(
                    "transform",
                    [
                        Column("project_id"),
                        [project1.id, project2.id],
                        [project1.slug, project2.slug],
                        "",
                    ],
                    "project",
                )
            ],
        )
Exemple #5
0
 def setUp(self):
     self.start = datetime.datetime(2015,
                                    5,
                                    18,
                                    10,
                                    15,
                                    1,
                                    tzinfo=timezone.utc)
     self.end = datetime.datetime(2015,
                                  5,
                                  19,
                                  10,
                                  15,
                                  1,
                                  tzinfo=timezone.utc)
     self.projects = [1, 2, 3]
     self.params = {
         "project_id": self.projects,
         "start": self.start,
         "end": self.end,
     }
     # These conditions should always be on a query when self.params is passed
     self.default_conditions = [
         Condition(Column("timestamp"), Op.GTE, self.start),
         Condition(Column("timestamp"), Op.LT, self.end),
         Condition(Column("project_id"), Op.IN, self.projects),
     ]
Exemple #6
0
    def test_environment_param(self):
        self.params["environment"] = ["", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Or([
                    Condition(Column("environment"), Op.IS_NULL),
                    Condition(Column("environment"), Op.EQ, "prod"),
                ]),
            ],
        )
        query.get_snql_query().validate()

        self.params["environment"] = ["dev", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
            ],
        )
        query.get_snql_query().validate()
    def test_join_query(self) -> None:
        ev = Entity("events", "ev")
        gm = Entity("groupedmessage", "gm")
        join = Join([Relationship(ev, "grouped", gm)])
        query = (Query("discover", join).set_select([
            Column("group_id", ev),
            Column("status", gm),
            Function("avg", [Column("retention_days", ev)], "avg"),
        ]).set_groupby([Column("group_id", ev),
                        Column("status", gm)]).set_where([
                            Condition(Column("project_id", ev), Op.EQ,
                                      self.project_id),
                            Condition(Column("project_id", gm), Op.EQ,
                                      self.project_id),
                            Condition(Column("timestamp", ev), Op.GTE,
                                      self.base_time),
                            Condition(Column("timestamp", ev), Op.LT,
                                      self.next_time),
                        ]))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200
        assert data["data"] == []
    def test_escape_edge_cases(self) -> None:
        query = (Query("events", Entity("events")).set_select(
            [Function("count", [], "times_seen")]).set_where([
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
                Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"),
            ]))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
    def test_sub_query(self) -> None:
        inner_query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")
             ]).set_groupby([Column("project_id"),
                             Column("tags[custom_tag]")]).set_where([
                                 Condition(Column("type"), Op.NEQ,
                                           "transaction"),
                                 Condition(Column("project_id"), Op.EQ,
                                           self.project_id),
                                 Condition(Column("timestamp"), Op.GTE,
                                           self.base_time),
                                 Condition(Column("timestamp"), Op.LT,
                                           self.next_time),
                             ]))

        query = (Query("discover", inner_query).set_select(
            [Function("avg", [Column("count")], "avg_count")]).set_orderby([
                OrderBy(Function("avg", [Column("count")], "avg_count"),
                        Direction.ASC)
            ]).set_limit(1000))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert data["data"] == [{"avg_count": 1.0}]
Exemple #10
0
def test_entity_validate_match(query: Query,
                               exception: Optional[Exception]) -> None:
    query = query.set_where([
        Condition(Column("required1"), Op.IN, [1, 2, 3]),
        Condition(Column("required2"), Op.EQ, 1),
        Condition(Column("time"), Op.GTE, BEFORE),
        Condition(Column("time"), Op.LT, AFTER),
    ], )

    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            validate_match(query, SEARCHER)
    else:
        validate_match(query, SEARCHER)
Exemple #11
0
def test_columns_with_entities(
    column_name: str,
    entity: Any,
    translated: str,
    exception: Optional[Exception],
) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            Column(column_name, entity)
    else:
        exp = Column(column_name, entity)
        assert exp.name == column_name
        assert exp.entity == entity
        assert TRANSLATOR.visit(exp) == translated
Exemple #12
0
def test_columns(
    column_name: str,
    valid: tuple[str, Optional[str], Optional[str], Optional[str]],
    translated: str,
    exception: Optional[Exception],
) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            Column(column_name)
    else:
        exp = Column(column_name)
        assert exp.name == valid[0]
        assert exp.subscriptable == valid[1]
        assert exp.key == valid[2]
        assert TRANSLATOR.visit(exp) == translated
Exemple #13
0
    def test_basic(self) -> None:
        now = datetime.now()
        self._insert_event_for_time(now)

        query = (Query(dataset="events", match=Entity("events")).set_select([
            Function("count", [], "count")
        ]).set_groupby([Column("project_id")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project.id),
            Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)),
            Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)),
        ]))

        result = snuba.raw_snql_query(query)
        assert len(result["data"]) == 1
        assert result["data"][0] == {"count": 1, "project_id": self.project.id}
Exemple #14
0
    def test_correct_times_seen_aggregate(self):
        query = _make_query(
            "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)",
            {"organization_id": 1},
            True,
        )
        assert Function("count()", [Column("times_seen")],
                        "times_seen") in query.select_params

        query = _make_query(
            "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)",
            {"organization_id": 1},
            True,
        )
        assert Function("sum", [Column("times_seen")],
                        "times_seen") in query.select_params
Exemple #15
0
 def test_correct_reason_mapping(self):
     query = _make_query(
         "statsPeriod=4d&interval=1d&groupBy=category&reason=spike_protection&field=sum(quantity)",
         {"organization_id": 1},
     )
     assert Condition(Column("reason"), Op.IN,
                      ["smart_rate_limit"]) in query.conditions
Exemple #16
0
def parse_extension_condition(col: str,
                              values: Any,
                              always_in: bool = False) -> Optional[Condition]:
    """
    Create an SDK condition using the values passed as extensions in the
    legacy API.

    :param col: The column that the automatic condition applies too.
    :type col: str
    :param values: The RHS values of the condition. Could be a single scalar
        or a sequence of values.
    :type values: Any
    :param always_in: Some conditions always use an IN condition, even if there is a single value.
    :type always_in: bool

    """

    column = Column(col)
    if isinstance(values, int):
        if always_in:
            values = (values, )
        else:
            return Condition(column, Op.EQ, values)

    if isinstance(values, (list, tuple)):
        rhs: Sequence[Any] = tuple(map(parse_scalar, values))
        return Condition(column, Op.IN, rhs)

    return None
Exemple #17
0
def test_entity_validate_match(
    conditions: ConditionGroup,
    entity: Entity,
    exception: Optional[Exception],
) -> None:
    query = Query(dataset="test",
                  match=entity,
                  select=[Column("test1"),
                          Column("required1")])
    query = query.set_where(conditions)

    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            validate_required_columns(query)
    else:
        validate_required_columns(query)
Exemple #18
0
    def column(self, name: str) -> Column:
        """Given an unresolved sentry name and return a snql column.

        :param name: The unresolved sentry name.
        """
        resolved_column = self.resolve_column_name(name)
        return Column(resolved_column)
Exemple #19
0
    def aliased_column(self, name: str, alias: str) -> SelectType:
        """Given an unresolved sentry name and an expected alias, return a snql
        column that will be aliased to the expected alias.

        :param name: The unresolved sentry name.
        :param alias: The expected alias in the result.
        """

        # TODO: This method should use an aliased column from the SDK once
        # that is available to skip these hacks that we currently have to
        # do aliasing.
        resolved = self.resolve_column_name(name)
        column = Column(resolved)

        # If the expected alias is identical to the resolved snuba column,
        # no need to do this aliasing trick.
        #
        # Additionally, tags of the form `tags[...]` can't be aliased again
        # because it confuses the sdk.
        if alias == resolved:
            return column

        # If the expected aliases differs from the resolved snuba column,
        # make sure to alias the expression appropriately so we get back
        # the column with the correct names.
        return AliasedExpression(column, alias)
Exemple #20
0
    def test_correct_outcome_mapping(self):
        query = _make_query(
            "statsPeriod=4d&interval=1d&groupBy=category&outcome=accepted&field=sum(quantity)",
            {"organization_id": 1},
        )

        assert Condition(Column("outcome"), Op.IN,
                         [Outcome.ACCEPTED]) in query.conditions
Exemple #21
0
def _check_releases_have_health_data(
    organization_id: int,
    project_ids: List[int],
    release_versions: List[str],
    start: datetime,
    end: datetime,
) -> Set[str]:
    """
    Returns a set of all release versions that have health data within a given period of time.
    """
    if not release_versions:
        return set()

    query = Query(
        dataset="sessions",
        match=Entity("sessions"),
        select=[Column("release")],
        groupby=[Column("release")],
        where=[
            Condition(Column("started"), Op.GTE, start),
            Condition(Column("started"), Op.LT, end),
            Condition(Column("org_id"), Op.EQ, organization_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("release"), Op.IN, release_versions),
        ],
    )
    data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[
        "data"
    ]
    return {row["release"] for row in data}
Exemple #22
0
class TimeseriesQueryBuilder(QueryFilter):  # type: ignore
    time_column = Column("time")

    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        granularity: int,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        equations: Optional[List[str]] = None,
        functions_acl: Optional[List[str]] = None,
        limit: Optional[int] = 10000,
    ):
        super().__init__(
            dataset,
            params,
            auto_fields=False,
            functions_acl=functions_acl,
            equation_config={
                "auto_add": True,
                "aggregates_only": True
            },
        )
        self.where, self.having = self.resolve_conditions(
            query, use_aggregate_conditions=False)

        self.limit = None if limit is None else Limit(limit)

        # params depends on parse_query, and conditions being resolved first since there may be projects in conditions
        self.where += self.resolve_params()
        self.columns = self.resolve_select(selected_columns, equations)
        self.granularity = Granularity(granularity)

        # This is a timeseries, the groupby will always be time
        self.groupby = [self.time_column]

    @property
    def select(self) -> List[SelectType]:
        if not self.aggregates:
            raise InvalidSearchQuery(
                "Cannot query a timeseries without a Y-Axis")
        # Casting for now since QueryFields/QueryFilter are only partially typed
        return cast(List[SelectType], self.aggregates)

    def get_snql_query(self) -> Query:
        return Query(
            dataset=self.dataset.value,
            match=Entity(self.dataset.value),
            select=self.select,
            where=self.where,
            having=self.having,
            groupby=self.groupby,
            orderby=[OrderBy(self.time_column, Direction.ASC)],
            granularity=self.granularity,
            limit=self.limit,
        )
Exemple #23
0
 def get_conditions(self, query: QueryDict, params: Mapping[Any, Any]) -> List[Any]:
     query_conditions = [
         Condition(Column("timestamp"), Op.GTE, self.start),
         Condition(Column("timestamp"), Op.LT, self.end),
     ]
     for filter_name in DIMENSION_MAP:
         raw_filter = query.getlist(filter_name, [])
         resolved_filter = DIMENSION_MAP[filter_name].resolve_filter(raw_filter)
         if len(resolved_filter) > 0:
             query_conditions.append(Condition(Column(filter_name), Op.IN, resolved_filter))
     if "project_id" in params:
         query_conditions.append(
             Condition(Column("project_id"), Op.IN, params["project_id"]),
         )
     if "organization_id" in params:
         query_conditions.append(
             Condition(Column("org_id"), Op.EQ, params["organization_id"]),
         )
     return query_conditions
Exemple #24
0
    def test_filter_keys(self):
        query = _make_query(
            "statsPeriod=6h&interval=10m&groupBy=category&field=sum(times_seen)",
            {"organization_id": 1},
            True,
        )
        assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions

        query = _make_query(
            "statsPeriod=6h&interval=1d&groupBy=category&field=sum(times_seen)",
            {
                "organization_id": 1,
                "project_id": [1, 2, 3, 4, 5]
            },
            True,
        )
        assert Condition(Column("org_id"), Op.EQ, 1) in query.conditions
        assert Condition(Column("project_id"), Op.IN,
                         [1, 2, 3, 4, 5]) in query.conditions
    def test_cache(self):
        """Minimal test to verify if use_cache works"""
        results = snuba.raw_snql_query(
            Query(
                "events",
                Entity("events"),
                select=[Column("event_id")],
                where=[
                    Condition(Column("project_id"), Op.EQ, self.project.id),
                    Condition(Column("timestamp"), Op.GTE,
                              timezone.now() - timedelta(days=1)),
                    Condition(Column("timestamp"), Op.LT, timezone.now()),
                ],
                limit=Limit(1),
            ),
            use_cache=True,
        )

        assert results["data"] == []
Exemple #26
0
def build_key_errors(interval, project):
    start, stop = interval

    # Take the 3 most frequently occuring events
    query = Query(
        dataset=Dataset.Events.value,
        match=Entity("events"),
        select=[Column("group_id"), Function("count", [])],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
        ],
        groupby=[Column("group_id")],
        orderby=[OrderBy(Function("count", []), Direction.DESC)],
        limit=Limit(3),
    )
    query_result = raw_snql_query(query, referrer="reports.key_errors")
    key_errors = query_result["data"]
    return [(e["group_id"], e["count()"]) for e in key_errors]
Exemple #27
0
    def test_correct_category_mapping(self):
        query = _make_query(
            "statsPeriod=4d&interval=1d&category=error&field=sum(quantity)",
            {"organization_id": 1},
        )

        assert (Condition(
            Column("category"),
            Op.IN,
            [DataCategory.DEFAULT, DataCategory.ERROR, DataCategory.SECURITY],
        )) in query.conditions
Exemple #28
0
    def test_project_in_condition_filters(self):
        # TODO(snql-boolean): Update this to match the corresponding test in test_filter
        project1 = self.create_project()
        project2 = self.create_project()
        self.params["project_id"] = [project1.id, project2.id]
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            f"project:{project1.slug}",
            selected_columns=["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("project_id"), Op.EQ, project1.id),
                Condition(Column("timestamp"), Op.GTE, self.start),
                Condition(Column("timestamp"), Op.LT, self.end),
            ],
        )
Exemple #29
0
    def __init__(
        self,
        query: QueryDict,
        params: Mapping[Any, Any],
        allow_minute_resolution: Optional[bool] = True,
    ):
        raw_fields = query.getlist("field", [])
        raw_groupby = query.getlist("groupBy", [])
        if len(raw_fields) == 0:
            raise InvalidField('At least one "field" is required.')
        self.fields = {}
        self.query: List[Any] = []  # not used but needed for compat with sessions logic
        allowed_resolution = (
            AllowedResolution.one_minute if allow_minute_resolution else AllowedResolution.one_hour
        )
        start, end, rollup = get_constrained_date_range(query, allowed_resolution)
        self.dataset, self.match = _outcomes_dataset(rollup)
        self.rollup = rollup
        self.start = start
        self.end = end
        self.select_params = []
        for key in raw_fields:
            if key not in COLUMN_MAP:
                raise InvalidField(f'Invalid field: "{key}"')
            field = COLUMN_MAP[key]
            self.select_params.append(field.select_params(self.dataset))
            self.fields[key] = field

        self.groupby = []
        for key in raw_groupby:
            if key not in GROUPBY_MAP:
                raise InvalidField(f'Invalid groupBy: "{key}"')
            self.groupby.append(GROUPBY_MAP[key])

        if len(query.getlist("category", [])) == 0 and "category" not in raw_groupby:
            raise InvalidQuery("Query must have category as groupby or filter")

        query_columns = set()
        for field in self.fields.values():
            query_columns.update(field.get_snuba_columns(raw_groupby))
        for groupby in self.groupby:
            query_columns.update(groupby.get_snuba_columns())
        self.query_columns = list(query_columns)

        query_groupby = set()
        for groupby in self.groupby:
            query_groupby.update(groupby.get_snuba_groupby())
        self.query_groupby = list(query_groupby)

        self.group_by = []
        for key in self.query_groupby:
            self.group_by.append(Column(key))

        self.conditions = self.get_conditions(query, params)
Exemple #30
0
 def test_orderby_duplicate_columns(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         selected_columns=["user.email", "user.email"],
         orderby=["user.email"],
     )
     self.assertCountEqual(
         query.orderby,
         [OrderBy(Column("email"), Direction.ASC)],
     )