def test_zerofill(): results = discover.zerofill({}, datetime(2019, 1, 2, 0, 0), datetime(2019, 1, 9, 23, 59, 59), 86400, "time") results_desc = discover.zerofill({}, datetime(2019, 1, 2, 0, 0), datetime(2019, 1, 9, 23, 59, 59), 86400, "-time") assert results == list(reversed(results_desc)) # Bucket for the 2, 3, 4, 5, 6, 7, 8, 9 assert len(results) == 8 assert results[0]["time"] == 1546387200 assert results[7]["time"] == 1546992000
def get_event_stats( query_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, zerofill_results: bool, comparison_delta: Optional[datetime] = None, ) -> SnubaTSResult: with sentry_sdk.start_span( op="discover.discover", description="timeseries.filter_transform"): builder = TimeseriesQueryBuilder( Dataset.Discover, params, rollup, query=query, selected_columns=query_columns, functions_acl=[ "array_join", "percentileArray", "sumArray" ], ) span_op_column = builder.resolve_function( "array_join(spans_op)") span_group_column = builder.resolve_function( "array_join(spans_group)") # Adding spans.op and spans.group to the group by because # We need them in the query to help the array join optimizer # in snuba take effect but the TimeseriesQueryBuilder # removes all non aggregates from the select clause. builder.groupby.extend([span_op_column, span_group_column]) builder.add_conditions([ Condition( Function("tuple", [span_op_column, span_group_column]), Op.IN, Function("tuple", [Function("tuple", [span.op, span.group])]), ), ]) snql_query = builder.get_snql_query() results = raw_snql_query( snql_query, "api.organization-events-spans-performance-stats") with sentry_sdk.start_span( op="discover.discover", description="timeseries.transform_results"): result = discover.zerofill( results["data"], params["start"], params["end"], rollup, "time", ) return SnubaTSResult({"data": result}, params["start"], params["end"], rollup)
def calculate_incident_start(query, projects, groups): """ Attempts to automatically calculate the date that an incident began at based on the events related to the incident. """ params = {} if groups: params["group_ids"] = [g.id for g in groups] end = max(g.last_seen for g in groups) + timedelta(seconds=1) else: end = timezone.now() params["start"] = end - INCIDENT_START_PERIOD params["end"] = end if projects: params["project_id"] = [p.id for p in projects] filter = get_filter(query, params) rollup = int(INCIDENT_START_ROLLUP.total_seconds()) result = raw_query( aggregations=[("count()", "", "count"), ("min", "timestamp", "first_seen")], orderby="time", groupby=["time"], rollup=rollup, referrer="incidents.calculate_incident_start", limit=10000, start=filter.start, end=filter.end, conditions=filter.conditions, filter_keys=filter.filter_keys, )["data"] # TODO: Start could be the period before the first period we find result = zerofill(result, params["start"], params["end"], rollup, "time") # We want to linearly scale scores from 100% value at the most recent to # 50% at the oldest. This gives a bias towards newer results. negative_weight = (1.0 / len(result)) / 2 multiplier = 1.0 cur_spike_max_count = -1 cur_spike_start = None cur_spike_end = None max_height = 0 incident_start = None cur_height = 0 prev_count = 0 def get_row_first_seen(row, default=None): first_seen = default if "first_seen" in row: first_seen = parse_date(row["first_seen"]).replace(tzinfo=pytz.utc) return first_seen def calculate_start(spike_start, spike_end): """ We arbitrarily choose a date about 1/3 into the incident period. We could potentially improve this if we want by analyzing the period in more detail and choosing a date that most closely fits with being 1/3 up the spike. """ spike_length = spike_end - spike_start return spike_start + (spike_length / 3) for row in reversed(result): cur_count = row.get("count", 0) if cur_count < prev_count or cur_count > 0 and cur_count == prev_count: cur_height = cur_spike_max_count - cur_count elif cur_count > 0 or prev_count > 0 or cur_height > 0: # Now we've got the height of the current spike, compare it to the # current max. We decrease the value by `multiplier` so that we # favour newer results cur_height *= multiplier if cur_height > max_height: # If we detect that we have a new highest peak, then set a new # incident start date incident_start = calculate_start(cur_spike_start, cur_spike_end) max_height = cur_height cur_height = 0 cur_spike_max_count = cur_count cur_spike_end = get_row_first_seen(row) # We attempt to get the first_seen value from the row here. If the row # doesn't have it (because it's a zerofilled row), then just use the # previous value. This allows us to have the start of a spike always be # a bucket that contains at least one element. cur_spike_start = get_row_first_seen(row, cur_spike_start) prev_count = cur_count multiplier -= negative_weight if (cur_height > max_height or not incident_start) and cur_spike_start: incident_start = calculate_start(cur_spike_start, cur_spike_end) if not incident_start: incident_start = timezone.now() return incident_start
def timeseries_query( selected_columns: Sequence[str], query: str, params: Dict[str, str], rollup: int, referrer: str, zerofill_results: bool = True, comparison_delta: Optional[timedelta] = None, functions_acl: Optional[List[str]] = None, use_snql: Optional[bool] = False, ) -> SnubaTSResult: """ High-level API for doing arbitrary user timeseries queries against events. this API should match that of sentry.snuba.discover.timeseries_query """ metrics_compatible = False equations, columns = categorize_columns(selected_columns) if comparison_delta is None and not equations: metrics_compatible = True if metrics_compatible: try: metrics_query = TimeseriesMetricQueryBuilder( params, rollup, query=query, selected_columns=columns, functions_acl=functions_acl, ) result = metrics_query.run_query(referrer + ".metrics-enhanced") result = discover.transform_results(result, metrics_query.function_alias_map, {}, None) result["data"] = ( discover.zerofill( result["data"], params["start"], params["end"], rollup, "time", ) if zerofill_results else result["data"] ) return SnubaTSResult( {"data": result["data"], "isMetricsData": True}, params["start"], params["end"], rollup, ) # raise Invalid Queries since the same thing will happen with discover except InvalidSearchQuery as error: raise error # any remaining errors mean we should try again with discover except IncompatibleMetricsQuery: metrics_compatible = False # This isn't a query we can enhance with metrics if not metrics_compatible: return discover.timeseries_query( selected_columns, query, params, rollup, referrer, zerofill_results, comparison_delta, functions_acl, use_snql, ) return SnubaTSResult()