Ejemplo n.º 1
0
def monitor_release_adoption(**kwargs):
    metrics.incr("sentry.tasks.monitor_release_adoption.start", sample_rate=1.0)
    # 1. Query snuba for all project ids that have sessions.
    with metrics.timer(
        "sentry.tasks.monitor_release_adoption.aggregate_projects.loop", sample_rate=1.0
    ):
        aggregated_projects = defaultdict(list)
        start_time = time.time()
        offset = 0
        while (time.time() - start_time) < MAX_SECONDS:
            query = (
                Query(
                    dataset="sessions",
                    match=Entity("org_sessions"),
                    select=[
                        Column("org_id"),
                        Column("project_id"),
                    ],
                    groupby=[Column("org_id"), Column("project_id")],
                    where=[
                        Condition(
                            Column("started"), Op.GTE, datetime.utcnow() - timedelta(hours=6)
                        ),
                        Condition(Column("started"), Op.LT, datetime.utcnow()),
                    ],
                    granularity=Granularity(3600),
                    orderby=[
                        OrderBy(Column("org_id"), Direction.ASC),
                        OrderBy(Column("project_id"), Direction.ASC),
                    ],
                )
                .set_limit(CHUNK_SIZE + 1)
                .set_offset(offset)
            )
            data = snuba.raw_snql_query(query, referrer="tasks.monitor_release_adoption")["data"]
            count = len(data)
            more_results = count > CHUNK_SIZE
            offset += CHUNK_SIZE

            if more_results:
                data = data[:-1]

            for row in data:
                aggregated_projects[row["org_id"]].append(row["project_id"])

            if not more_results:
                break

        else:
            logger.info(
                "monitor_release_adoption.loop_timeout",
                sample_rate=1.0,
                extra={"offset": offset},
            )
    with metrics.timer(
        "sentry.tasks.monitor_release_adoption.process_projects_with_sessions", sample_rate=1.0
    ):
        for org_id in aggregated_projects:
            process_projects_with_sessions.delay(org_id, aggregated_projects[org_id])
Ejemplo n.º 2
0
    def test_sub_query(self) -> None:
        inner_query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")
             ]).set_groupby([Column("project_id"),
                             Column("tags[custom_tag]")]).set_where([
                                 Condition(Column("type"), Op.NEQ,
                                           "transaction"),
                                 Condition(Column("project_id"), Op.EQ,
                                           self.project_id),
                                 Condition(Column("timestamp"), Op.GTE,
                                           self.base_time),
                                 Condition(Column("timestamp"), Op.LT,
                                           self.next_time),
                             ]))

        query = (Query("discover", inner_query).set_select(
            [Function("avg", [Column("count")], "avg_count")]).set_orderby([
                OrderBy(Function("avg", [Column("count")], "avg_count"),
                        Direction.ASC)
            ]).set_limit(1000))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert data["data"] == [{"avg_count": 1.0}]
Ejemplo n.º 3
0
    def test_generate_order_by_clause(self):
        for derived_metric_name in DERIVED_METRICS.keys():
            if derived_metric_name == self.crash_free_fake.metric_name:
                continue
            derived_metric_obj = DERIVED_METRICS[derived_metric_name]
            assert derived_metric_obj.generate_orderby_clause(
                projects=[self.project], direction=Direction.ASC) == [
                    OrderBy(
                        derived_metric_obj.generate_select_statements(
                            [self.project])[0], Direction.ASC)
                ]

        with pytest.raises(DerivedMetricParseException):
            self.crash_free_fake.generate_orderby_clause(
                projects=[self.project], direction=Direction.DESC)
Ejemplo n.º 4
0
def test_build_snuba_query_orderby(mock_now, mock_now2, mock_indexer):

    mock_indexer.resolve = MockIndexer().resolve
    query_params = MultiValueDict({
        "query":
        ["release:staging"
         ],  # weird release but we need a string exising in mock indexer
        "groupBy": ["session.status", "environment"],
        "field": [
            "sum(sentry.sessions.session)",
        ],
        "orderBy": ["-sum(sentry.sessions.session)"],
        "limit": [3],
    })
    query_definition = QueryDefinition(query_params)
    snuba_queries = SnubaQueryBuilder([PseudoProject(1, 1)],
                                      query_definition).get_snuba_queries()

    counter_queries = snuba_queries.pop("metrics_counters")
    assert not snuba_queries
    assert counter_queries["series"] is None  # No series because of orderBy

    assert counter_queries["totals"] == Query(
        dataset="metrics",
        match=Entity("metrics_counters"),
        select=[Function("sum", [Column("value")], "value")],
        groupby=[
            Column("metric_id"),
            Column("tags[8]"),
            Column("tags[2]"),
        ],
        where=[
            Condition(Column("org_id"), Op.EQ, 1),
            Condition(Column("project_id"), Op.IN, [1]),
            Condition(Column("metric_id"), Op.IN, [9]),
            Condition(Column("timestamp"), Op.GTE,
                      datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
            Condition(Column("timestamp"), Op.LT,
                      datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
        ],
        orderby=[OrderBy(Column("value"), Direction.DESC)],
        limit=Limit(3),
        offset=Offset(0),
        granularity=Granularity(query_definition.rollup),
    )
Ejemplo n.º 5
0
    def test_sessions_query(self) -> None:
        query = (Query("sessions", Entity("sessions")).set_select(
            [Column("project_id"), Column("release")]).set_groupby(
                [Column("project_id"), Column("release")]).set_where([
                    Condition(Column("project_id"), Op.IN, [self.project_id]),
                    Condition(Column("org_id"), Op.EQ, self.org_id),
                    Condition(
                        Column("started"),
                        Op.GTE,
                        datetime(2021, 1, 1, 17, 5, 59, 554860),
                    ),
                    Condition(Column("started"), Op.LT,
                              datetime(2022, 1, 1, 17, 6, 0, 554981)),
                ]).set_orderby([OrderBy(Column("sessions"),
                                        Direction.DESC)]).set_limit(100))

        response = self.post("/sessions/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200
        assert data["data"] == []
Ejemplo n.º 6
0
    def test_tags_in_groupby(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("tags[k8s-app]")]).set_where([
            Condition(Column("project_id"), Op.EQ, self.project_id),
            Condition(Column("timestamp"), Op.GTE, self.base_time),
            Condition(Column("timestamp"), Op.LT, self.next_time),
            Condition(Column("tags[k8s-app]"), Op.NEQ, ""),
            Condition(Column("type"), Op.NEQ, "transaction"),
        ]).set_orderby([
            OrderBy(
                Function("max", [Column("timestamp")], "last_seen"),
                Direction.DESC,
            )
        ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
Ejemplo n.º 7
0
    def test_simple_query(self) -> None:
        query = (Query("discover", Entity("discover_events")).set_select(
            [Function("count", [], "count")]).set_groupby(
                [Column("project_id"),
                 Column("tags[custom_tag]")]).set_where([
                     Condition(Column("type"), Op.NEQ, "transaction"),
                     Condition(Column("project_id"), Op.EQ, self.project_id),
                     Condition(Column("timestamp"), Op.GTE, self.base_time),
                     Condition(Column("timestamp"), Op.LT, self.next_time),
                 ]).set_orderby([
                     OrderBy(Function("count", [], "count"), Direction.ASC)
                 ]).set_limit(1000).set_consistent(True).set_debug(True))

        response = self.post("/discover/snql", data=query.snuba())
        data = json.loads(response.data)

        assert response.status_code == 200, data
        assert data["stats"]["consistent"]
        assert data["data"] == [{
            "count": 1,
            "tags[custom_tag]": "custom_value",
            "project_id": self.project_id,
        }]
Ejemplo n.º 8
0
    def test_arrayjoin(self) -> None:
        query = (Query("events", Entity("events")).set_select([
            Function("count", [], "times_seen"),
            Function("min", [Column("timestamp")], "first_seen"),
            Function("max", [Column("timestamp")], "last_seen"),
        ]).set_groupby([Column("exception_frames.filename")]).set_array_join(
            [Column("exception_frames.filename")]).set_where([
                Condition(Column("exception_frames.filename"), Op.LIKE,
                          "%.java"),
                Condition(Column("project_id"), Op.EQ, self.project_id),
                Condition(Column("timestamp"), Op.GTE, self.base_time),
                Condition(Column("timestamp"), Op.LT, self.next_time),
            ]).set_orderby([
                OrderBy(
                    Function("max", [Column("timestamp")], "last_seen"),
                    Direction.DESC,
                )
            ]).set_limit(1000))

        response = self.post("/events/snql", data=query.snuba())
        data = json.loads(response.data)
        assert response.status_code == 200, data
        assert len(data["data"]) == 6
Ejemplo n.º 9
0
def _get_snuba_query(
    org_id: int,
    query: QueryDefinition,
    entity_key: EntityKey,
    metric_id: int,
    columns: List[SelectableExpression],
    series: bool,
    limit_state: _LimitState,
    extra_conditions: List[Condition],
) -> Optional[Query]:
    """Build the snuba query

    Return None if the results from the initial totals query was empty.
    """
    conditions = [
        Condition(Column("org_id"), Op.EQ, org_id),
        Condition(Column("project_id"), Op.IN,
                  query.filter_keys["project_id"]),
        Condition(Column("metric_id"), Op.EQ, metric_id),
        Condition(Column(TS_COL_QUERY), Op.GTE, query.start),
        Condition(Column(TS_COL_QUERY), Op.LT, query.end),
    ]
    conditions += _get_filter_conditions(org_id, query.conditions)
    conditions += extra_conditions

    groupby = {}
    for field in query.raw_groupby:
        if field == "session.status":
            # This will be handled by conditional aggregates
            continue

        if field == "project":
            groupby["project"] = Column("project_id")
            continue

        try:
            groupby[field] = Column(resolve_tag_key(field))
        except MetricIndexNotFound:
            # exclude unresolved keys from groupby
            pass

    full_groupby = list(set(groupby.values()))

    if series:
        full_groupby.append(Column(TS_COL_GROUP))

    query_args = dict(
        dataset=Dataset.Metrics.value,
        match=Entity(entity_key.value),
        select=columns,
        groupby=full_groupby,
        where=conditions,
        granularity=Granularity(query.rollup),
    )

    # In case of group by, either set a limit or use the groups from the
    # first query to limit the results:
    if query.raw_groupby:
        if not limit_state.initialized:
            # Set limit and order by to be consistent with sessions_v2
            max_groups = SNUBA_LIMIT // len(get_timestamps(query))
            query_args["limit"] = Limit(max_groups)
            query_args["orderby"] = [OrderBy(columns[0], Direction.DESC)]
        else:
            if limit_state.limiting_conditions is None:
                # Initial query returned no results, no need to run any more queries
                return None

            query_args["where"] += limit_state.limiting_conditions
            query_args["limit"] = Limit(SNUBA_LIMIT)

    return Query(**query_args)
Ejemplo n.º 10
0
def sum_sessions_and_releases(org_id, project_ids):
    # Takes a single org id and a list of project ids
    # returns counts of releases and sessions across all environments and passed project_ids for the last 6 hours
    start_time = time.time()
    offset = 0
    totals = defaultdict(dict)
    with metrics.timer(
            "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.loop"
    ):
        while (time.time() - start_time) < MAX_SECONDS:
            with metrics.timer(
                    "sentry.tasks.monitor_release_adoption.process_projects_with_sessions.query"
            ):
                query = (Query(
                    dataset="sessions",
                    match=Entity("sessions"),
                    select=[
                        Column("sessions"),
                    ],
                    groupby=[
                        Column("org_id"),
                        Column("project_id"),
                        Column("release"),
                        Column("environment"),
                    ],
                    where=[
                        Condition(Column("started"), Op.GTE,
                                  datetime.utcnow() - timedelta(hours=6)),
                        Condition(Column("started"), Op.LT, datetime.utcnow()),
                        Condition(Column("org_id"), Op.EQ, org_id),
                        Condition(Column("project_id"), Op.IN, project_ids),
                    ],
                    granularity=Granularity(21600),
                    orderby=[
                        OrderBy(Column("org_id"), Direction.ASC),
                        OrderBy(Column("project_id"), Direction.ASC),
                    ],
                ).set_limit(CHUNK_SIZE + 1).set_offset(offset))

                data = snuba.raw_snql_query(
                    query,
                    referrer="tasks.process_projects_with_sessions.session_count"
                )["data"]
                count = len(data)
                more_results = count > CHUNK_SIZE
                offset += CHUNK_SIZE

                if more_results:
                    data = data[:-1]

                for row in data:
                    row_totals = totals[row["project_id"]].setdefault(
                        row["environment"], {
                            "total_sessions": 0,
                            "releases": defaultdict(int)
                        })
                    row_totals["total_sessions"] += row["sessions"]
                    row_totals["releases"][row["release"]] += row["sessions"]

            if not more_results:
                break
        else:
            logger.info(
                "process_projects_with_sessions.loop_timeout",
                extra={
                    "org_id": org_id,
                    "project_ids": project_ids
                },
            )
    return totals
Ejemplo n.º 11
0
    def test_suspect_spans_lambdas(self) -> None:
        query = (Query(
            "discover", Entity("discover_transactions")
        ).set_select([
            Column("spans.op"),
            Column("spans.group"),
            Function(
                "arrayReduce",
                [
                    "sumIf",
                    Column("spans.exclusive_time_32"),
                    Function(
                        "arrayMap",
                        [
                            Lambda(
                                ["x", "y"],
                                Function(
                                    "if",
                                    [
                                        Function(
                                            "equals",
                                            [
                                                Function(
                                                    "and",
                                                    [
                                                        Function(
                                                            "equals",
                                                            [
                                                                Identifier(
                                                                    "x"),
                                                                "db",
                                                            ],
                                                        ),
                                                        Function(
                                                            "equals",
                                                            [
                                                                Identifier(
                                                                    "y"),
                                                                "05029609156d8133",
                                                            ],
                                                        ),
                                                    ],
                                                ),
                                                1,
                                            ],
                                        ),
                                        1,
                                        0,
                                    ],
                                ),
                            ),
                            Column("spans.op"),
                            Column("spans.group"),
                        ],
                    ),
                ],
                "array_spans_exclusive_time",
            ),
        ]).set_where([
            Condition(Column("transaction_name"), Op.EQ, "/api/do_things"),
            Condition(Function("has", [Column("spans.op"), "db"]), Op.EQ, 1),
            Condition(
                Function("has", [Column("spans.group"), "05029609156d8133"]),
                Op.EQ,
                1,
            ),
            Condition(Column("duration"), Op.LT, 900000.0),
            Condition(Column("finish_ts"), Op.GTE, self.base_time),
            Condition(Column("finish_ts"), Op.LT, self.next_time),
            Condition(Column("project_id"), Op.IN, (self.project_id, )),
        ]).set_orderby(
            [OrderBy(Column("array_spans_exclusive_time"),
                     Direction.DESC)]).set_limit(10))

        response = self.post("/discover/snql", data=query.snuba())
        resp = json.loads(response.data)
        assert response.status_code == 200, resp
        data = resp["data"]
        assert len(data) == 1
        assert data[0]["array_spans_exclusive_time"] > 0
Ejemplo n.º 12
0
     Query(
         dataset="discover",
         match=Entity("events", "ev", 0.2),
         select=[
             Column("title"),
             Column("tags[release:1]"),
             Function("uniq", [Column("event_id")], "uniq_events"),
         ],
         groupby=[Column("title"), Column("tags[release:1]")],
         where=[
             Condition(Column("timestamp"), Op.GT, NOW),
             Condition(Function("toHour", [Column("timestamp")]), Op.LTE, NOW),
             Condition(Column("project_id"), Op.IN, Function("tuple", [1, 2, 3])),
         ],
         having=[Condition(Function("uniq", [Column("event_id")]), Op.GT, 1)],
         orderby=[OrderBy(Column("title"), Direction.ASC)],
         limitby=LimitBy([Column("title"), Column("event_id")], 5),
         limit=Limit(10),
         offset=Offset(1),
         granularity=Granularity(3600),
         debug=Debug(True),
     ),
     id="complex query",
 ),
 pytest.param(
     Query("discover", Entity("events", None, 0.2))
     .set_select([Column("event_id")])
     .set_where([Condition(Column("timestamp"), Op.GT, NOW)])
     .set_limit(10)
     .set_offset(1)
     .set_granularity(3600),
Ejemplo n.º 13
0
def test_build_snuba_query_orderby(mock_now, mock_now2, monkeypatch):
    monkeypatch.setattr("sentry.sentry_metrics.indexer.resolve",
                        MockIndexer().resolve)
    query_params = MultiValueDict({
        "query":
        ["release:staging"
         ],  # weird release but we need a string exising in mock indexer
        "groupBy": ["session.status", "environment"],
        "field": [
            "sum(sentry.sessions.session)",
        ],
        "orderBy": ["-sum(sentry.sessions.session)"],
    })
    query_definition = QueryDefinition(query_params,
                                       paginator_kwargs={"limit": 3})
    snuba_queries, _ = SnubaQueryBuilder([PseudoProject(1, 1)],
                                         query_definition).get_snuba_queries()

    counter_queries = snuba_queries.pop("metrics_counters")
    assert not snuba_queries

    op = "sum"
    metric_name = "sentry.sessions.session"
    select = Function(
        OP_TO_SNUBA_FUNCTION["metrics_counters"]["sum"],
        [
            Column("value"),
            Function("equals",
                     [Column("metric_id"),
                      resolve_weak(metric_name)])
        ],
        alias=f"{op}({metric_name})",
    )

    assert counter_queries["totals"] == Query(
        dataset="metrics",
        match=Entity("metrics_counters"),
        select=[select],
        groupby=[
            Column("tags[8]"),
            Column("tags[2]"),
        ],
        where=[
            Condition(Column("org_id"), Op.EQ, 1),
            Condition(Column("project_id"), Op.IN, [1]),
            Condition(Column("timestamp"), Op.GTE,
                      datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
            Condition(Column("timestamp"), Op.LT,
                      datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
            Condition(Column("metric_id"), Op.IN, [9]),
        ],
        orderby=[OrderBy(select, Direction.DESC)],
        limit=Limit(3),
        offset=Offset(0),
        granularity=Granularity(query_definition.rollup),
    )
    assert counter_queries["series"] == Query(
        dataset="metrics",
        match=Entity("metrics_counters"),
        select=[select],
        groupby=[
            Column("tags[8]"),
            Column("tags[2]"),
            Column("bucketed_time"),
        ],
        where=[
            Condition(Column("org_id"), Op.EQ, 1),
            Condition(Column("project_id"), Op.IN, [1]),
            Condition(Column("timestamp"), Op.GTE,
                      datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
            Condition(Column("timestamp"), Op.LT,
                      datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
            Condition(Column("metric_id"), Op.IN, [9]),
        ],
        orderby=[OrderBy(select, Direction.DESC)],
        limit=Limit(6480),
        offset=Offset(0),
        granularity=Granularity(query_definition.rollup),
    )