def validate_aggregations(self, aggregations): try: return [AlertRuleAggregations(agg) for agg in aggregations] except ValueError: raise serializers.ValidationError( 'Invalid aggregation, valid values are %s' % [item.value for item in AlertRuleAggregations], )
def build_subscription_update(self, subscription=None, time_delta=None, value=None): if time_delta is not None: timestamp = int(to_timestamp(timezone.now() + time_delta)) else: timestamp = int(time()) values = {} if subscription: aggregation_type = alert_aggregation_to_snuba[ AlertRuleAggregations(subscription.aggregations[0])] value = randint(0, 100) if value is None else value values = {aggregation_type[2]: value} return { "subscription_id": subscription.subscription_id if subscription else uuid4().hex, "values": values, "timestamp": timestamp, "interval": 1, "partition": 1, "offset": 1, }
def process_update(self, subscription_update): if not hasattr(self, "alert_rule"): # If the alert rule has been removed then just skip metrics.incr( "incidents.alert_rules.no_alert_rule_for_subscription") logger.error( "Received an update for a subscription, but no associated alert rule exists" ) # TODO: Delete subscription here. return if subscription_update["timestamp"] <= self.last_update: metrics.incr( "incidents.alert_rules.skipping_already_processed_update") return self.last_update = subscription_update["timestamp"] # TODO: At the moment we only have individual aggregations. Handle multiple # later aggregation = AlertRuleAggregations(self.alert_rule.aggregations[0]) aggregation_name = alert_aggregation_to_snuba[aggregation][2] aggregation_value = subscription_update["values"][aggregation_name] alert_operator, resolve_operator = self.THRESHOLD_TYPE_OPERATORS[ AlertRuleThresholdType(self.alert_rule.threshold_type)] if (alert_operator(aggregation_value, self.alert_rule.alert_threshold) and not self.active_incident): with transaction.atomic(): self.trigger_alert_threshold() elif ( # TODO: Need to make `resolve_threshold` nullable so that it can be # optional self.alert_rule.resolve_threshold is not None and resolve_operator(aggregation_value, self.alert_rule.resolve_threshold) and self.active_incident): with transaction.atomic(): self.trigger_resolve_threshold() else: self.alert_triggers = 0 self.resolve_triggers = 0 # We update the rule stats here after we commit the transaction. This guarantees # that we'll never miss an update, since we'll never roll back if the process # is killed here. The trade-off is that we might process an update twice. Mostly # this will have no effect, but if someone manages to close a triggered incident # before the next one then we might alert twice. self.update_alert_rule_stats()
def test(self): # Full integration test to ensure that when a subscription receives an update # the `QuerySubscriptionConsumer` successfully retries the subscription and # calls the correct callback, which should result in an incident being created. callback = subscriber_registry[INCIDENTS_SNUBA_SUBSCRIPTION_TYPE] def exception_callback(*args, **kwargs): # We want to just error after the callback so that we can see the result of # processing. This means the offset won't be committed, but that's fine, we # can still check the results. callback(*args, **kwargs) raise KeyboardInterrupt() value_name = alert_aggregation_to_snuba[ AlertRuleAggregations(self.subscription.aggregations[0]) ][2] subscriber_registry[INCIDENTS_SNUBA_SUBSCRIPTION_TYPE] = exception_callback message = { "version": 1, "payload": { "subscription_id": self.subscription.subscription_id, "values": {value_name: self.rule.alert_threshold + 1}, "timestamp": 1235, "interval": 5, "partition": 50, "offset": 10, }, } self.producer.produce(self.topic, json.dumps(message)) self.producer.flush() def active_incident_exists(): return Incident.objects.filter( type=IncidentType.ALERT_TRIGGERED.value, status=IncidentStatus.OPEN.value, alert_rule=self.rule, ).exists() consumer = QuerySubscriptionConsumer("hi", topic=self.topic) with self.assertChanges(active_incident_exists, before=False, after=True): consumer.run()
def update_alert_rule( alert_rule, name=None, threshold_type=None, query=None, aggregations=None, time_window=None, alert_threshold=None, resolve_threshold=None, threshold_period=None, ): """ Updates an alert rule. :param alert_rule: The alert rule to update :param name: Name for the alert rule. This will be used as part of the incident name, and must be unique per project. :param threshold_type: An AlertRuleThresholdType :param query: An event search query to subscribe to and monitor for alerts :param aggregations: A list of AlertRuleAggregations that we want to fetch for this alert rule :param time_window: Time period to aggregate over, in minutes. :param alert_threshold: Value that the subscription needs to reach to trigger the alert :param resolve_threshold: Value that the subscription needs to reach to resolve the alert :param threshold_period: How many update periods the value of the subscription needs to exceed the threshold before triggering :return: The updated `AlertRule` """ if (name and alert_rule.name != name and AlertRule.objects.filter( project=alert_rule.project, name=name).exists()): raise AlertRuleNameAlreadyUsedError() old_subscription_id = None subscription_id = None updated_fields = {} if name: updated_fields["name"] = name if threshold_type: updated_fields["threshold_type"] = threshold_type.value if query is not None: validate_alert_rule_query(query) updated_fields["query"] = query if aggregations: updated_fields["aggregations"] = [a.value for a in aggregations] if time_window: updated_fields["time_window"] = time_window if alert_threshold: updated_fields["alert_threshold"] = alert_threshold if resolve_threshold: updated_fields["resolve_threshold"] = resolve_threshold if threshold_period: updated_fields["threshold_period"] = threshold_period if query or aggregations or time_window: old_subscription_id = alert_rule.subscription_id # If updating any details of the query, create a new subscription subscription_id = create_snuba_subscription( alert_rule.project, SnubaDatasets(alert_rule.dataset), query if query is not None else alert_rule.query, aggregations if aggregations else [AlertRuleAggregations(agg) for agg in alert_rule.aggregations], time_window if time_window else alert_rule.time_window, DEFAULT_ALERT_RULE_RESOLUTION, ) updated_fields["subscription_id"] = subscription_id try: alert_rule.update(**updated_fields) except Exception: # If we error for some reason and have a valid subscription_id then # attempt to delete from snuba to avoid orphaned subscriptions. if subscription_id: delete_snuba_subscription(subscription_id) raise if old_subscription_id: # Once we're set up correctly, remove the previous subscription id. delete_snuba_subscription(old_subscription_id) return alert_rule