Exemple #1
0
def test_zerofill():
    results = discover.zerofill({}, datetime(2019, 1, 2, 0, 0),
                                datetime(2019, 1, 9, 23, 59, 59), 86400,
                                "time")
    results_desc = discover.zerofill({}, datetime(2019, 1, 2, 0, 0),
                                     datetime(2019, 1, 9, 23, 59, 59), 86400,
                                     "-time")

    assert results == list(reversed(results_desc))

    # Bucket for the 2, 3, 4, 5, 6, 7, 8, 9
    assert len(results) == 8

    assert results[0]["time"] == 1546387200
    assert results[7]["time"] == 1546992000
Exemple #2
0
        def get_event_stats(
            query_columns: Sequence[str],
            query: str,
            params: Dict[str, str],
            rollup: int,
            zerofill_results: bool,
            comparison_delta: Optional[datetime] = None,
        ) -> SnubaTSResult:
            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="timeseries.filter_transform"):
                builder = TimeseriesQueryBuilder(
                    Dataset.Discover,
                    params,
                    rollup,
                    query=query,
                    selected_columns=query_columns,
                    functions_acl=[
                        "array_join", "percentileArray", "sumArray"
                    ],
                )

                span_op_column = builder.resolve_function(
                    "array_join(spans_op)")
                span_group_column = builder.resolve_function(
                    "array_join(spans_group)")

                # Adding spans.op and spans.group to the group by because
                # We need them in the query to help the array join optimizer
                # in snuba take effect but the TimeseriesQueryBuilder
                # removes all non aggregates from the select clause.
                builder.groupby.extend([span_op_column, span_group_column])

                builder.add_conditions([
                    Condition(
                        Function("tuple", [span_op_column, span_group_column]),
                        Op.IN,
                        Function("tuple",
                                 [Function("tuple", [span.op, span.group])]),
                    ),
                ])

                snql_query = builder.get_snql_query()
                results = raw_snql_query(
                    snql_query,
                    "api.organization-events-spans-performance-stats")

            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="timeseries.transform_results"):
                result = discover.zerofill(
                    results["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )

            return SnubaTSResult({"data": result}, params["start"],
                                 params["end"], rollup)
Exemple #3
0
def calculate_incident_start(query, projects, groups):
    """
    Attempts to automatically calculate the date that an incident began at based
    on the events related to the incident.
    """
    params = {}
    if groups:
        params["group_ids"] = [g.id for g in groups]
        end = max(g.last_seen for g in groups) + timedelta(seconds=1)
    else:
        end = timezone.now()

    params["start"] = end - INCIDENT_START_PERIOD
    params["end"] = end

    if projects:
        params["project_id"] = [p.id for p in projects]

    filter = get_filter(query, params)
    rollup = int(INCIDENT_START_ROLLUP.total_seconds())

    result = raw_query(
        aggregations=[("count()", "", "count"),
                      ("min", "timestamp", "first_seen")],
        orderby="time",
        groupby=["time"],
        rollup=rollup,
        referrer="incidents.calculate_incident_start",
        limit=10000,
        start=filter.start,
        end=filter.end,
        conditions=filter.conditions,
        filter_keys=filter.filter_keys,
    )["data"]
    # TODO: Start could be the period before the first period we find
    result = zerofill(result, params["start"], params["end"], rollup, "time")

    # We want to linearly scale scores from 100% value at the most recent to
    # 50% at the oldest. This gives a bias towards newer results.
    negative_weight = (1.0 / len(result)) / 2
    multiplier = 1.0
    cur_spike_max_count = -1
    cur_spike_start = None
    cur_spike_end = None
    max_height = 0
    incident_start = None
    cur_height = 0
    prev_count = 0

    def get_row_first_seen(row, default=None):
        first_seen = default
        if "first_seen" in row:
            first_seen = parse_date(row["first_seen"]).replace(tzinfo=pytz.utc)
        return first_seen

    def calculate_start(spike_start, spike_end):
        """
        We arbitrarily choose a date about 1/3 into the incident period. We
        could potentially improve this if we want by analyzing the period in
        more detail and choosing a date that most closely fits with being 1/3
        up the spike.
        """
        spike_length = spike_end - spike_start
        return spike_start + (spike_length / 3)

    for row in reversed(result):
        cur_count = row.get("count", 0)
        if cur_count < prev_count or cur_count > 0 and cur_count == prev_count:
            cur_height = cur_spike_max_count - cur_count
        elif cur_count > 0 or prev_count > 0 or cur_height > 0:
            # Now we've got the height of the current spike, compare it to the
            # current max. We decrease the value by `multiplier` so that we
            # favour newer results
            cur_height *= multiplier
            if cur_height > max_height:
                # If we detect that we have a new highest peak, then set a new
                # incident start date
                incident_start = calculate_start(cur_spike_start,
                                                 cur_spike_end)
                max_height = cur_height

            cur_height = 0
            cur_spike_max_count = cur_count
            cur_spike_end = get_row_first_seen(row)

        # We attempt to get the first_seen value from the row here. If the row
        # doesn't have it (because it's a zerofilled row), then just use the
        # previous value. This allows us to have the start of a spike always be
        # a bucket that contains at least one element.
        cur_spike_start = get_row_first_seen(row, cur_spike_start)
        prev_count = cur_count
        multiplier -= negative_weight

    if (cur_height > max_height or not incident_start) and cur_spike_start:
        incident_start = calculate_start(cur_spike_start, cur_spike_end)

    if not incident_start:
        incident_start = timezone.now()

    return incident_start
Exemple #4
0
def timeseries_query(
    selected_columns: Sequence[str],
    query: str,
    params: Dict[str, str],
    rollup: int,
    referrer: str,
    zerofill_results: bool = True,
    comparison_delta: Optional[timedelta] = None,
    functions_acl: Optional[List[str]] = None,
    use_snql: Optional[bool] = False,
) -> SnubaTSResult:
    """
    High-level API for doing arbitrary user timeseries queries against events.
    this API should match that of sentry.snuba.discover.timeseries_query
    """
    metrics_compatible = False
    equations, columns = categorize_columns(selected_columns)
    if comparison_delta is None and not equations:
        metrics_compatible = True

    if metrics_compatible:
        try:
            metrics_query = TimeseriesMetricQueryBuilder(
                params,
                rollup,
                query=query,
                selected_columns=columns,
                functions_acl=functions_acl,
            )
            result = metrics_query.run_query(referrer + ".metrics-enhanced")
            result = discover.transform_results(result, metrics_query.function_alias_map, {}, None)
            result["data"] = (
                discover.zerofill(
                    result["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )
                if zerofill_results
                else result["data"]
            )
            return SnubaTSResult(
                {"data": result["data"], "isMetricsData": True},
                params["start"],
                params["end"],
                rollup,
            )
        # raise Invalid Queries since the same thing will happen with discover
        except InvalidSearchQuery as error:
            raise error
        # any remaining errors mean we should try again with discover
        except IncompatibleMetricsQuery:
            metrics_compatible = False

    # This isn't a query we can enhance with metrics
    if not metrics_compatible:
        return discover.timeseries_query(
            selected_columns,
            query,
            params,
            rollup,
            referrer,
            zerofill_results,
            comparison_delta,
            functions_acl,
            use_snql,
        )
    return SnubaTSResult()