Example #1
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        granularity: int,
        query: Optional[str] = None,
        selected_columns: Optional[List[str]] = None,
        equations: Optional[List[str]] = None,
        functions_acl: Optional[List[str]] = None,
        limit: Optional[int] = 10000,
    ):
        super().__init__(
            dataset,
            params,
            auto_fields=False,
            functions_acl=functions_acl,
            equation_config={
                "auto_add": True,
                "aggregates_only": True
            },
        )
        self.where, self.having = self.resolve_conditions(
            query, use_aggregate_conditions=False)

        self.limit = None if limit is None else Limit(limit)

        # params depends on parse_query, and conditions being resolved first since there may be projects in conditions
        self.where += self.resolve_params()
        self.columns = self.resolve_select(selected_columns, equations)
        self.granularity = Granularity(granularity)

        # This is a timeseries, the groupby will always be time
        self.groupby = [self.time_column]
Example #2
0
    def test_simple(self) -> None:
        query = Query(
            dataset="sessions",
            match=Entity("org_sessions"),
            select=[Column("org_id"), Column("project_id")],
            groupby=[Column("org_id"), Column("project_id")],
            where=[
                Condition(Column("started"), Op.GTE,
                          datetime.utcnow() - timedelta(hours=6)),
                Condition(Column("started"), Op.LT, datetime.utcnow()),
            ],
            granularity=Granularity(3600),
        )

        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 2
        assert data["data"][0]["org_id"] == self.org_id
        assert data["data"][0]["project_id"] == self.project_id
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id2
Example #3
0
def build_project_usage_outcomes(start__stop, project):
    start, stop = start__stop

    # XXX(epurkhiser): Tsdb used to use day buckets, where the end would
    # represent a whole day. Snuba queries more accurately thus we must
    # capture the entire last day
    end = stop + timedelta(days=1)

    query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("outcome"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, end),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(
                Column("outcome"), Op.IN,
                [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("outcome"), Column("category")],
        granularity=Granularity(ONE_DAY),
    )
    data = raw_snql_query(query, referrer="reports.outcomes")["data"]

    return (
        # Accepted errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped errors
        sum(row["total"] for row in data
            if row["category"] in DataCategory.error_categories()
            and row["outcome"] == Outcome.RATE_LIMITED),
        # accepted transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.ACCEPTED),
        # Dropped transactions
        sum(row["total"] for row in data
            if row["category"] == DataCategory.TRANSACTION
            and row["outcome"] == Outcome.RATE_LIMITED),
    )
Example #4
0
def run_outcomes_query_timeseries(query: QueryDefinition) -> ResultSet:
    snql_query = Query(
        dataset=query.dataset.value,
        match=Entity(query.match),
        select=query.select_params,
        groupby=query.group_by + [Column(TS_COL)],
        where=query.conditions,
        limit=Limit(10000),
        offset=Offset(0),
        granularity=Granularity(query.rollup),
    )
    result_timeseries = raw_snql_query(snql_query, referrer="outcomes.timeseries")
    return _format_rows(result_timeseries["data"], query)
Example #5
0
    def test_orderby(self) -> None:
        self.project_id3 = next(self.id_iter)
        self.org_id2 = next(self.id_iter)
        self.generate_session_events(self.org_id2, self.project_id3)

        query = Query(
            dataset="sessions",
            match=Entity("org_sessions"),
            select=[Column("org_id"), Column("project_id")],
            groupby=[Column("org_id"), Column("project_id")],
            where=[
                Condition(Column("started"), Op.GTE,
                          datetime.utcnow() - timedelta(hours=6)),
                Condition(Column("started"), Op.LT, datetime.utcnow()),
            ],
            granularity=Granularity(3600),
            orderby=[OrderBy(Column("org_id"), Direction.ASC)],
        )

        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id
        assert data["data"][0]["project_id"] == self.project_id
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id2
        assert data["data"][2]["org_id"] == self.org_id2
        assert data["data"][2]["project_id"] == self.project_id3

        query = query.set_orderby(
            [OrderBy(Column("org_id"), Direction.DESC)], )
        response = self.app.post(
            "/sessions/snql",
            data=query.snuba(),
        )
        data = json.loads(response.data)
        assert response.status_code == 200, response.data
        assert len(data["data"]) == 3
        assert data["data"][0]["org_id"] == self.org_id2
        assert data["data"][0]["project_id"] == self.project_id3
        assert data["data"][1]["org_id"] == self.org_id
        assert data["data"][1]["project_id"] == self.project_id
        assert data["data"][2]["org_id"] == self.org_id
        assert data["data"][2]["project_id"] == self.project_id2
def _get_snuba_query(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_id: int,
    columns: Sequence[str],
    series: bool,
    extra_conditions: List[Condition],
    remove_groupby: Set[Column],
) -> Query:
    """Build the snuba query"""
    conditions = [
        Condition(Column("org_id"), Op.EQ, org_id),
        Condition(Column("project_id"), Op.IN,
                  query.filter_keys["project_id"]),
        Condition(Column("metric_id"), Op.EQ, metric_id),
        Condition(Column(TS_COL_QUERY), Op.GTE, query.start),
        Condition(Column(TS_COL_QUERY), Op.LT, query.end),
    ]
    conditions += _get_filter_conditions(org_id, query.conditions)
    conditions += extra_conditions

    groupby_tags = [field for field in query.raw_groupby if field != "project"]

    tag_keys = {field: _resolve(field) for field in groupby_tags}
    groupby = {
        field: Column(f"tags[{tag_id}]")
        for field, tag_id in tag_keys.items()
        if tag_id is not None  # exclude unresolved keys from groupby
    }

    if "project" in query.raw_groupby:
        groupby["project"] = Column("project_id")

    full_groupby = set(groupby.values()) - remove_groupby
    if series:
        full_groupby.add(Column(TS_COL_GROUP))

    return Query(
        dataset=Dataset.Metrics.value,
        match=Entity(entity_key.value),
        select=[Column(column) for column in columns],
        groupby=list(full_groupby),
        where=conditions,
        granularity=Granularity(query.rollup),
    )
Example #7
0
    def _get_crash_free_rate_data(
        org_id: int,
        project_ids: Sequence[int],
        start: datetime,
        end: datetime,
        rollup: int,
    ) -> Dict[int, Dict[str, float]]:

        data: Dict[int, Dict[str, float]] = {}

        session_status = tag_key(org_id, "session.status")

        count_query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity(EntityKey.MetricsCounters.value),
            select=[Column("value")],
            where=[
                Condition(Column("org_id"), Op.EQ, org_id),
                Condition(Column("project_id"), Op.IN, project_ids),
                Condition(Column("metric_id"), Op.EQ,
                          metric_id(org_id, "session")),
                Condition(Column("timestamp"), Op.GTE, start),
                Condition(Column("timestamp"), Op.LT, end),
            ],
            groupby=[
                Column("project_id"),
                Column(session_status),
            ],
            granularity=Granularity(rollup),
        )

        count_data = raw_snql_query(
            count_query,
            referrer="release_health.metrics.get_crash_free_data",
            use_cache=False)["data"]

        for row in count_data:
            project_data = data.setdefault(row["project_id"], {})
            tag_value = reverse_tag_value(org_id, row[session_status])
            project_data[tag_value] = row["value"]

        return data
Example #8
0
    def get_oldest_health_data_for_releases(
        self,
        project_releases: Sequence[ProjectRelease],
    ) -> Mapping[ProjectRelease, str]:

        now = datetime.now(pytz.utc)
        start = now - timedelta(days=90)

        project_ids: List[ProjectId] = [x[0] for x in project_releases]
        org_id = self._get_org_id(project_ids)
        release_column_name = tag_key(org_id, "release")
        releases = [x[1] for x in project_releases]
        releases_ids = [
            release_id for release_id in
            [try_get_string_index(org_id, release) for release in releases]
            if release_id is not None
        ]

        query_cols = [
            Column("project_id"),
            Column(release_column_name),
            Function("min", [Column("bucketed_time")], "oldest"),
        ]

        group_by = [
            Column("project_id"),
            Column(release_column_name),
        ]

        where_clause = [
            Condition(Column("org_id"), Op.EQ, org_id),
            Condition(Column("project_id"), Op.IN, project_ids),
            Condition(Column("metric_id"), Op.EQ, metric_id(org_id,
                                                            "session")),
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, now),
            Condition(Column(release_column_name), Op.IN, releases_ids),
        ]

        query = Query(
            dataset=Dataset.Metrics.value,
            match=Entity("metrics_counters"),
            select=query_cols,
            where=where_clause,
            groupby=group_by,
            granularity=Granularity(3600),
        )
        rows = raw_snql_query(
            query,
            referrer=
            "release_health.metrics.get_oldest_health_data_for_releases",
            use_cache=False,
        )["data"]

        result = {}

        for row in rows:
            result[row["project_id"],
                   reverse_tag_value(org_id, row[release_column_name]
                                     )] = row["oldest"]

        return result
     groupby=[Column("title")],
     where=[
         Condition(Column("timestamp"), Op.GT, NOW),
         Condition(Function("toHour", [Column("timestamp")]), Op.LTE,
                   NOW),
         Condition(Column("project_id"), Op.IN,
                   Function("tuple", [1, 2, 3])),
     ],
     having=[
         Condition(Function("uniq", [Column("event_id")]), Op.GT, 1)
     ],
     orderby=[OrderBy(Column("title"), Direction.ASC)],
     limitby=LimitBy(Column("title"), 5),
     limit=Limit(10),
     offset=Offset(1),
     granularity=Granularity(3600),
     totals=Totals(True),
 ),
 (
     "MATCH (events SAMPLE 1000)",
     "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50",
     "BY title",
     ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') "
      "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') "
      "AND project_id IN tuple(1, 2, 3)"),
     "HAVING uniq(event_id) > 1",
     "ORDER BY title ASC",
     "LIMIT 5 BY title",
     "LIMIT 10",
     "OFFSET 1",
     "GRANULARITY 3600",
Example #10
0
def build_project_series(start__stop, project):
    start, stop = start__stop
    rollup = ONE_DAY

    resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup)
    assert resolution == rollup, "resolution does not match requested value"

    clean = partial(clean_series, start, stop, rollup)

    def zerofill_clean(data):
        return clean(zerofill(data, start, stop, rollup, fill_default=0))

    # Note: this section can be removed
    issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED,
                                         resolved_at__gte=start,
                                         resolved_at__lt=stop).values_list(
                                             "id", flat=True)

    # TODO: The TSDB calls could be replaced with a SnQL call here
    tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids,
                                                     start, stop, rollup)
    resolved_error_series = reduce(
        merge_series,
        map(clean, tsdb_range_resolved.values()),
        clean([(timestamp, 0) for timestamp in series]),
    )
    # end

    # Use outcomes to compute total errors and transactions
    outcomes_query = Query(
        dataset=Dataset.Outcomes.value,
        match=Entity("outcomes"),
        select=[
            Column("time"),
            Column("category"),
            Function("sum", [Column("quantity")], "total"),
        ],
        where=[
            Condition(Column("timestamp"), Op.GTE, start),
            Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)),
            Condition(Column("project_id"), Op.EQ, project.id),
            Condition(Column("org_id"), Op.EQ, project.organization_id),
            Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED),
            Condition(
                Column("category"),
                Op.IN,
                [*DataCategory.error_categories(), DataCategory.TRANSACTION],
            ),
        ],
        groupby=[Column("time"), Column("category")],
        granularity=Granularity(rollup),
        orderby=[OrderBy(Column("time"), Direction.ASC)],
    )
    outcome_series = raw_snql_query(outcomes_query,
                                    referrer="reports.outcome_series")
    total_error_series = OrderedDict()
    for v in outcome_series["data"]:
        if v["category"] in DataCategory.error_categories():
            timestamp = int(to_timestamp(parse_snuba_datetime(v["time"])))
            total_error_series[timestamp] = total_error_series.get(
                timestamp, 0) + v["total"]

    total_error_series = zerofill_clean(list(total_error_series.items()))
    transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))),
                           v["total"]) for v in outcome_series["data"]
                          if v["category"] == DataCategory.TRANSACTION]
    transaction_series = zerofill_clean(transaction_series)

    error_series = merge_series(
        resolved_error_series,
        total_error_series,
        lambda resolved, total:
        (resolved, total - resolved),  # Resolved, Unresolved
    )

    # Format of this series: [(resolved , unresolved, transactions)]
    return merge_series(
        error_series,
        transaction_series,
        lambda errors, transactions: errors + (transactions, ),
    )
Example #11
0
def test_granularity(value: Any, exception: Optional[Exception]) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            Granularity(value)
    else:
        assert Granularity(value).granularity == value
Example #12
0
 def set_granularity(self, granularity: int) -> "Query":
     return self._replace("granularity", Granularity(granularity))