예제 #1
0
    def test_zerofill(self):
        results = zerofill({}, datetime(2019, 1, 2, 0, 0),
                           datetime(2019, 1, 9, 23, 59, 59), 86400, "time")
        results_desc = zerofill({}, datetime(2019, 1, 2, 0, 0),
                                datetime(2019, 1, 9, 23, 59, 59), 86400,
                                "-time")

        assert results == list(reversed(results_desc))

        # Bucket for the 2, 3, 4, 5, 6, 7, 8, 9
        assert len(results) == 8

        assert results[0]["time"] == 1546387200
        assert results[7]["time"] == 1546992000
예제 #2
0
def calculate_incident_start(query, projects, groups):
    """
    Attempts to automatically calculate the date that an incident began at based
    on the events related to the incident.
    """
    params = {}
    if groups:
        params["issue.id"] = [g.id for g in groups]
        end = max(g.last_seen for g in groups) + timedelta(seconds=1)
    else:
        end = timezone.now()

    params["start"] = end - INCIDENT_START_PERIOD
    params["end"] = end

    if projects:
        params["project_id"] = [p.id for p in projects]

    filter = get_filter(query, params)
    rollup = int(INCIDENT_START_ROLLUP.total_seconds())

    result = raw_query(
        aggregations=[("count()", "", "count"),
                      ("min", "timestamp", "first_seen")],
        orderby="time",
        groupby=["time"],
        rollup=rollup,
        referrer="incidents.calculate_incident_start",
        limit=10000,
        start=filter.start,
        end=filter.end,
        conditions=filter.conditions,
        filter_keys=filter.filter_keys,
    )["data"]
    # TODO: Start could be the period before the first period we find
    result = zerofill(result, params["start"], params["end"], rollup, "time")

    # We want to linearly scale scores from 100% value at the most recent to
    # 50% at the oldest. This gives a bias towards newer results.
    negative_weight = (1.0 / len(result)) / 2
    multiplier = 1.0
    cur_spike_max_count = -1
    cur_spike_start = None
    cur_spike_end = None
    max_height = 0
    incident_start = None
    cur_height = 0
    prev_count = 0

    def get_row_first_seen(row, default=None):
        first_seen = default
        if "first_seen" in row:
            first_seen = parse_date(row["first_seen"]).replace(tzinfo=pytz.utc)
        return first_seen

    def calculate_start(spike_start, spike_end):
        """
        We arbitrarily choose a date about 1/3 into the incident period. We
        could potentially improve this if we want by analyzing the period in
        more detail and choosing a date that most closely fits with being 1/3
        up the spike.
        """
        spike_length = spike_end - spike_start
        return spike_start + (spike_length / 3)

    for row in reversed(result):
        cur_count = row.get("count", 0)
        if cur_count < prev_count or cur_count > 0 and cur_count == prev_count:
            cur_height = cur_spike_max_count - cur_count
        elif cur_count > 0 or prev_count > 0 or cur_height > 0:
            # Now we've got the height of the current spike, compare it to the
            # current max. We decrease the value by `multiplier` so that we
            # favour newer results
            cur_height *= multiplier
            if cur_height > max_height:
                # If we detect that we have a new highest peak, then set a new
                # incident start date
                incident_start = calculate_start(cur_spike_start,
                                                 cur_spike_end)
                max_height = cur_height

            cur_height = 0
            cur_spike_max_count = cur_count
            cur_spike_end = get_row_first_seen(row)

        # We attempt to get the first_seen value from the row here. If the row
        # doesn't have it (because it's a zerofilled row), then just use the
        # previous value. This allows us to have the start of a spike always be
        # a bucket that contains at least one element.
        cur_spike_start = get_row_first_seen(row, cur_spike_start)
        prev_count = cur_count
        multiplier -= negative_weight

    if (cur_height > max_height or not incident_start) and cur_spike_start:
        incident_start = calculate_start(cur_spike_start, cur_spike_end)

    if not incident_start:
        incident_start = timezone.now()

    return incident_start
예제 #3
0
def timeseries_query(selected_columns,
                     query,
                     params,
                     rollup,
                     reference_event=None,
                     referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    snuba_filter = get_filter(query, params)
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    if not snuba_args["start"] and not snuba_args["end"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_args.update(
        resolve_field_list(selected_columns, snuba_args, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, _ = resolve_discover_aliases(snuba_args)
    if not snuba_args["aggregations"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    result = raw_query(
        aggregations=snuba_args.get("aggregations"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_args["start"], snuba_args["end"],
                      rollup, "time")

    return SnubaTSResult(result, snuba_filter.start, snuba_filter.end, rollup)