Exemple #1
0
def test_query(query: Query, exception: Optional[Exception]) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            query.validate()
        return

    query.validate()
Exemple #2
0
def test_invalid_subquery() -> None:
    with pytest.raises(
            InvalidQueryError,
            match=re.escape(
                "inner query is invalid: query must have at least one expression in select"
            ),
    ):
        Query("discover", Query(dataset="discover",
                                match=Entity("events"))).set_select(
                                    [Column("event_id"),
                                     Column("title")])

    with pytest.raises(
            InvalidQueryError,
            match=re.escape(
                "inner query is invalid: query must have at least one expression in select"
            ),
    ):
        Query(
            "discover",
            Query(
                dataset="discover",
                match=Entity("events"),
                select=[Column("title"), Column("timestamp")],
            ),
        ).set_match(Query(dataset="discover",
                          match=Entity("events"))).set_select([
                              Function("uniq", [Column("new_event")],
                                       "uniq_event"),
                              Column("title")
                          ])
Exemple #3
0
def test_invalid_query() -> None:
    with pytest.raises(InvalidQueryError,
                       match=re.escape("queries must have a valid dataset")):
        Query(dataset=1, match=Entity("events"))  # type: ignore

    with pytest.raises(InvalidQueryError,
                       match=re.escape("queries must have a valid dataset")):
        Query(dataset="", match=Entity("events"))

    with pytest.raises(InvalidQueryError,
                       match=re.escape("queries must have a valid Entity")):
        Query(dataset="discover", match="events")  # type: ignore

    with pytest.raises(
            InvalidConditionError,
            match=re.escape(
                "invalid condition: LHS of a condition must be a Column, CurriedFunction or Function, not <class 'snuba_sdk.aliased_expression.AliasedExpression'>"
            ),
    ):
        (Query("discover", Entity("events")).set_select(
            [AliasedExpression(Column("transaction"), "tn")]).set_where([
                Condition(AliasedExpression(Column("project_id"), "pi"), Op.IN,
                          (1, ))
            ]  # type: ignore
                                                                        ))
    def test_sub_query(self) -> None:
        inner_query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")
             ]).set_groupby([Column("project_id"),
                             Column("tags[custom_tag]")]).set_where([
                                 Condition(Column("type"), Op.NEQ,
                                           "transaction"),
                                 Condition(Column("project_id"), Op.EQ,
                                           self.project_id),
                                 Condition(Column("timestamp"), Op.GTE,
                                           self.base_time),
                                 Condition(Column("timestamp"), Op.LT,
                                           self.next_time),
                             ]))

        query = (Query("discover", inner_query).set_select(
            [Function("avg", [Column("count")], "avg_count")]).set_orderby([
                OrderBy(Function("avg", [Column("count")], "avg_count"),
                        Direction.ASC)
            ]).set_limit(1000))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert data["data"] == [{"avg_count": 1.0}]
Exemple #5
0
    def test_simple(self) -> None:
        query = Query(
            dataset="sessions",
            match=Entity("org_sessions"),
            select=[Column("org_id"), Column("project_id")],
            groupby=[Column("org_id"), Column("project_id")],
            where=[
                Condition(Column("started"), Op.GTE,
                          datetime.utcnow() - timedelta(hours=6)),
                Condition(Column("started"), Op.LT, datetime.utcnow()),
            ],
            granularity=Granularity(3600),
        )

        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 2
        assert data["data"][0]["org_id"] == self.org_id
        assert data["data"][0]["project_id"] == self.project_id
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id2
def test_invalid_query() -> None:
    with pytest.raises(InvalidQuery,
                       match=re.escape("queries must have a valid dataset")):
        Query(dataset=1, match=Entity("events"))  # type: ignore

    with pytest.raises(InvalidQuery,
                       match=re.escape("queries must have a valid dataset")):
        Query(dataset="", match=Entity("events"))

    with pytest.raises(InvalidQuery,
                       match=re.escape("queries must have a valid Entity")):
        Query(dataset="discover", match="events")  # type: ignore
Exemple #7
0
    def test_orderby(self) -> None:
        self.project_id3 = next(self.id_iter)
        self.org_id2 = next(self.id_iter)
        self.generate_session_events(self.org_id2, self.project_id3)

        query = Query(
            dataset="sessions",
            match=Entity("org_sessions"),
            select=[Column("org_id"), Column("project_id")],
            groupby=[Column("org_id"), Column("project_id")],
            where=[
                Condition(Column("started"), Op.GTE,
                          datetime.utcnow() - timedelta(hours=6)),
                Condition(Column("started"), Op.LT, datetime.utcnow()),
            ],
            granularity=Granularity(3600),
            orderby=[OrderBy(Column("org_id"), Direction.ASC)],
        )

        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id
        assert data["data"][0]["project_id"] == self.project_id
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id2
        assert data["data"][2]["org_id"] == self.org_id2
        assert data["data"][2]["project_id"] == self.project_id3

        query = query.set_orderby(
            [OrderBy(Column("org_id"), Direction.DESC)], )
        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id2
        assert data["data"][0]["project_id"] == self.project_id3
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id
        assert data["data"][2]["org_id"] == self.org_id
        assert data["data"][2]["project_id"] == self.project_id2
Exemple #8
0
def _get_full_hierarchical_hashes(group: Group, hash: str) -> Optional[Sequence[str]]:
    query = (
        Query("events", Entity("events"))
        .set_select(
            [
                Column("hierarchical_hashes"),
            ]
        )
        .set_where(
            _get_group_filters(group)
            + [
                Condition(
                    Function(
                        "has",
                        [Column("hierarchical_hashes"), hash],
                    ),
                    Op.EQ,
                    1,
                ),
            ]
        )
    )

    data = snuba.raw_snql_query(query, referrer="group_split.get_full_hierarchical_hashes")["data"]
    if not data:
        return None

    return data[0]["hierarchical_hashes"]
def test_translate_query(query: Query, clauses: Sequence[str],
                         extras: Optional[Sequence[Tuple[str, bool]]]) -> None:
    joined = " ".join(clauses)
    body: MutableMapping[str, Any] = {"dataset": "discover", "query": joined}
    if extras:
        body.update({k: v for k, v in extras})
    assert query.snuba() == json.dumps(body)
Exemple #10
0
def test_join_validate_match(
    conditions: ConditionGroup,
    entity: Entity,
    exception: Optional[Exception],
) -> None:
    other_join_entity = Entity("test_b", "tb", None, SCHEMA)
    join2_conditions = [
        Condition(Column("required1", other_join_entity), Op.IN, [1, 2, 3]),
        Condition(Column("required2", other_join_entity), Op.EQ, 1),
        Condition(Column("time", other_join_entity), Op.GTE, BEFORE),
        Condition(Column("time", other_join_entity), Op.LT, AFTER),
        *conditions,
    ]
    query = Query(
        dataset="test",
        match=Join([Relationship(entity, "has", other_join_entity)]),
        select=[
            Column("test1", entity),
            Column("required1", other_join_entity)
        ],
        where=join2_conditions,
    )

    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            validate_required_columns(query)
    else:
        validate_required_columns(query)
Exemple #11
0
def _check_releases_have_health_data(
    organization_id: int,
    project_ids: List[int],
    release_versions: List[str],
    start: datetime,
    end: datetime,
) -> Set[str]:
    """
    Returns a set of all release versions that have health data within a given period of time.
    """
    if not release_versions:
        return set()

    query = Query(
        dataset="sessions",
        match=Entity("sessions"),
        select=[Column("release")],
        groupby=[Column("release")],
        where=[
            Condition(Column("started"), Op.GTE, start),
            Condition(Column("started"), Op.LT, end),
            Condition(Column("org_id"), Op.EQ, organization_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("release"), Op.IN, release_versions),
        ],
    )
    data = snuba.raw_snql_query(query, referrer="snuba.sessions.check_releases_have_health_data")[
        "data"
    ]
    return {row["release"] for row in data}
Exemple #12
0
def test_entity_validate_match(
    conditions: ConditionGroup,
    entity: Entity,
    exception: Optional[Exception],
) -> None:
    query = Query(dataset="test",
                  match=entity,
                  select=[Column("test1"),
                          Column("required1")])
    query = query.set_where(conditions)

    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            validate_required_columns(query)
    else:
        validate_required_columns(query)
    def test_join_query(self) -> None:
        ev = Entity("events", "ev")
        gm = Entity("groupedmessage", "gm")
        join = Join([Relationship(ev, "grouped", gm)])
        query = (Query("discover", join).set_select([
            Column("group_id", ev),
            Column("status", gm),
            Function("avg", [Column("retention_days", ev)], "avg"),
        ]).set_groupby([Column("group_id", ev),
                        Column("status", gm)]).set_where([
                            Condition(Column("project_id", ev), Op.EQ,
                                      self.project_id),
                            Condition(Column("project_id", gm), Op.EQ,
                                      self.project_id),
                            Condition(Column("timestamp", ev), Op.GTE,
                                      self.base_time),
                            Condition(Column("timestamp", ev), Op.LT,
                                      self.next_time),
                        ]))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200
        assert data["data"] == []
Exemple #14
0
    def visit(self, query: main.Query) -> QVisited:
        fields = query.get_fields()
        returns = {}
        for field in fields:
            returns[field] = getattr(self, f"_visit_{field}")(getattr(query, field))

        return self._combine(query, returns)
Exemple #15
0
    def _combine(self, query: main.Query, returns: Mapping[str, str]) -> str:
        clause_order = query.get_fields()
        # These fields are encoded outside of the SQL
        to_skip = (
            "dataset",
            "consistent",
            "turbo",
            "debug",
            "dry_run",
            "legacy",
            "parent_api",
            "team",
            "feature",
        )

        separator = "\n" if (self.pretty and not self.is_inner) else " "
        formatted = separator.join(
            [returns[c] for c in clause_order if c not in to_skip and returns[c]]
        )

        if self.pretty and not self.is_inner:
            prefix = ""
            for skip in to_skip:
                if returns.get(skip):
                    prefix += f"-- {skip.upper()}: {returns[skip]}\n"
            formatted = f"{prefix}{formatted}"

        return formatted
Exemple #16
0
def get_levels_overview(group):
    query = (Query("events", Entity("events")).set_select([
        Column("primary_hash"),
        Function("max", [Function("length", [Column("hierarchical_hashes")])],
                 "num_levels"),
        _current_level_expr(group),
    ]).set_where(_get_group_filters(group)).set_groupby(
        [Column("primary_hash")]))

    res = snuba.raw_snql_query(
        query, referrer="api.group_hashes_levels.get_levels_overview")

    if not res["data"]:
        raise NoEvents()

    if len(res["data"]) > 1:
        raise MergedIssues()

    assert len(res["data"]) == 1

    fields = res["data"][0]

    if fields["num_levels"] <= 0:
        raise NotHierarchical()

    # TODO: Cache this if it takes too long. This is called from multiple
    # places, grouping overview and then again in the new-issues endpoint.

    return LevelsOverview(
        current_level=fields["current_level"] - 1,
        only_primary_hash=fields["primary_hash"],
        num_levels=fields["num_levels"],
    )
def _get_hash_for_parent_level(group: Group, id: int,
                               levels_overview: LevelsOverview) -> str:
    # If this is violated, there cannot be a 1:1 mapping between level and hash.
    assert 0 <= id < levels_overview.current_level

    # This cache never needs explicit invalidation because during every level
    # change, the group ID changes.
    #
    # No idea if the query is slow, caching just because I can.
    cache_key = f"group-parent-level-hash:{group.id}:{id}"

    return_hash: str = cache.get(cache_key)

    if return_hash is None:
        query = (Query("events", Entity("events")).set_select([
            Function("arrayElement", [Column("hierarchical_hashes"), id + 1],
                     "hash")
        ]).set_where(_get_group_filters(group)).set_limit(1))

        return_hash: str = get_path(snuba.raw_snql_query(query), "data", 0,
                                    "hash")  # type: ignore
        cache.set(cache_key, return_hash)

    assert return_hash
    return return_hash
Exemple #18
0
 def get_snql_query(self) -> Query:
     return Query(
         dataset=self.dataset.value,
         match=Entity(self.dataset.value),
         select=self.select,
         where=self.where,
         groupby=self.groupby,
         limit=self.limit,
     )
def test_pretty_print_query(
        query: Query, clauses: Sequence[str],
        extras: Optional[Sequence[Tuple[str, bool]]]) -> None:
    joined = "\n".join(clauses)
    prefix = "-- DATASET: discover\n"
    if extras:
        for key, value in extras:
            prefix += f"-- {key.upper()}: {value}\n"

    expected = f"{prefix}{joined}"
    assert query.print() == expected
Exemple #20
0
 def get_snql_query(self) -> Query:
     return Query(
         dataset=self.dataset.value,
         match=Entity(self.dataset.value),
         select=self.select,
         where=self.where,
         having=self.having,
         groupby=self.groupby,
         orderby=[OrderBy(self.time_column, Direction.ASC)],
         granularity=self.granularity,
         limit=self.limit,
     )
    def test_escape_edge_cases(self) -> None:
        query = (Query("events", Entity("events")).set_select(
            [Function("count", [], "times_seen")]).set_where([
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
                Condition(Column("environment"), Op.EQ, "\\' \n \\n \\"),
            ]))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
Exemple #22
0
def build_project_usage_outcomes(start__stop, project):
    start, stop = start__stop

    # XXX(epurkhiser): Tsdb used to use day buckets, where the end would
    # represent a whole day. Snuba queries more accurately thus we must
    # capture the entire last day
    end = stop + timedelta(days=1)

    query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("outcome"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, end),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(
                Column("outcome"), Op.IN,
                [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("outcome"), Column("category")],
        granularity=Granularity(ONE_DAY),
    )
    data = raw_snql_query(query, referrer="reports.outcomes")["data"]

    return (
        # Accepted errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.RATE_LIMITED),
        # accepted transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.RATE_LIMITED),
    )
Exemple #23
0
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet:
    snql_query = Query(
        dataset=query.dataset.value,
        match=Entity(query.match),
        select=query.select_params,
        groupby=query.group_by + [Column(TS_COL)],
        where=query.conditions,
        limit=Limit(10000),
        offset=Offset(0),
        granularity=Granularity(query.rollup),
    )
    result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries")
    return _format_rows(result_timeseries["data"], query)
Exemple #24
0
    def get_snql_query(self) -> Query:
        self.validate_having_clause()

        return Query(
            dataset=self.dataset.value,
            match=Entity(self.dataset.value),
            select=self.select,
            where=self.where,
            having=self.having,
            groupby=self.groupby,
            orderby=self.orderby,
            limit=self.limit,
            offset=self.offset,
        )
Exemple #25
0
def test_entity_validate_match(query: Query,
                               exception: Optional[Exception]) -> None:
    query = query.set_where([
        Condition(Column("required1"), Op.IN, [1, 2, 3]),
        Condition(Column("required2"), Op.EQ, 1),
        Condition(Column("time"), Op.GTE, BEFORE),
        Condition(Column("time"), Op.LT, AFTER),
    ], )

    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            validate_match(query, SEARCHER)
    else:
        validate_match(query, SEARCHER)
Exemple #26
0
    def test_basic(self) -> None:
        now = datetime.now()
        self._insert_event_for_time(now)

        query = (Query(dataset="events", match=Entity("events")).set_select([
            Function("count", [], "count")
        ]).set_groupby([Column("project_id")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project.id),
            Condition(Column("timestamp"), Op.GTE, now - timedelta(days=1)),
            Condition(Column("timestamp"), Op.LT, now + timedelta(days=1)),
        ]))

        result = snuba.raw_snql_query(query)
        assert len(result["data"]) == 1
        assert result["data"][0] == {"count": 1, "project_id": self.project.id}
Exemple #27
0
def _get_project_releases_count(
    organization_id: int,
    project_ids: Sequence[int],
    scope: str,
    stats_period: Optional[str] = None,
    environments: Optional[Sequence[str]] = None,
) -> int:
    """
    Fetches the total count of releases/project combinations
    """
    if stats_period is None:
        stats_period = "24h"

    # Special rule that we support sorting by the last 24h only.
    if scope.endswith("_24h"):
        stats_period = "24h"

    _, stats_start, _ = get_rollup_starts_and_buckets(stats_period)

    where = [
        Condition(Column("started"), Op.GTE, stats_start),
        Condition(Column("started"), Op.LT, datetime.now()),
        Condition(Column("project_id"), Op.IN, project_ids),
        Condition(Column("org_id"), Op.EQ, organization_id),
    ]
    if environments is not None:
        where.append(Condition(Column("environment"), Op.IN, environments))

    having = []
    # Filter out releases with zero users when sorting by either `users` or `crash_free_users`
    if scope in ["users", "crash_free_users"]:
        having.append(Condition(Column("users"), Op.GT, 0))

    query = Query(
        dataset="sessions",
        match=Entity("sessions"),
        select=[
            Function(
                "uniqExact",
                [Column("release"), Column("project_id")],
                alias="count")
        ],
        where=where,
        having=having,
    )
    data = snuba.raw_snql_query(
        query, referrer="snuba.sessions.get_project_releases_count")["data"]
    return data[0]["count"] if data else 0
Exemple #28
0
    def get_snql_query(self) -> Query:
        self.validate_having_clause()

        return Query(
            dataset=self.dataset.value,
            match=Entity(self.dataset.value, sample=self.sample_rate),
            select=self.columns,
            array_join=self.array_join,
            where=self.where,
            having=self.having,
            groupby=self.groupby,
            orderby=self.orderby,
            limit=self.limit,
            offset=self.offset,
            limitby=self.limitby,
            turbo=self.turbo,
        )
    def test_cache(self):
        """Minimal test to verify if use_cache works"""
        results = snuba.raw_snql_query(
            Query(
                "events",
                Entity("events"),
                select=[Column("event_id")],
                where=[
                    Condition(Column("project_id"), Op.EQ, self.project.id),
                    Condition(Column("timestamp"), Op.GTE,
                              timezone.now() - timedelta(days=1)),
                    Condition(Column("timestamp"), Op.LT, timezone.now()),
                ],
                limit=Limit(1),
            ),
            use_cache=True,
        )

        assert results["data"] == []
Exemple #30
0
 def query_p95(interval):
     start, stop = interval
     query = Query(
         dataset=Dataset.Transactions.value,
         match=Entity("transactions"),
         select=[
             Column("transaction_name"),
             Function("quantile(0.95)", [Column("duration")], "p95"),
         ],
         where=[
             Condition(Column("finish_ts"), Op.GTE, start),
             Condition(Column("finish_ts"), Op.LT,
                       stop + timedelta(days=1)),
             Condition(Column("transaction_name"), Op.IN,
                       transaction_names),
             Condition(Column("project_id"), Op.EQ, project.id),
         ],
         groupby=[Column("transaction_name")],
     )
     return raw_snql_query(query, referrer="reports.key_transactions.p95")