def delete_rule(request): """ Tags: rules --- Delete a rule given its UUID. READ permission required on Cloud. EDIT_RULES permission required on Machine --- rule: in: path type: string required: true description: the unique identifier of the rule to be deleted """ auth_context = auth_context_from_request(request) rule_id = request.matchdict.get('rule') try: rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id) rule.ctl.set_auth_context(auth_context) rule.ctl.delete() Notification.objects( # Delete related notifications. owner=auth_context.owner, rtype='rule', rid=rule_id).delete() except Rule.DoesNotExist: raise RuleNotFoundError() return Response('OK', 200)
def toggle_rule(request): """ Tags: rules --- Enable or disable a rule Permits Owners to temporarily disable or re-enable a rule's evaluation --- rule: in: path type: string required: true description: the UUID of the rule to be updated action: in: query type: string required: true description: the action to perform (enable, disable) """ auth_context = auth_context_from_request(request) action = params_from_request(request).get('action') rule_id = request.matchdict.get('rule') if not auth_context.is_owner(): raise UnauthorizedError('Restricted to Owners') if not action: raise RequiredParameterMissingError('action') if action not in ( 'enable', 'disable', ): raise BadRequestError('Action must be one of (enable, disable)') try: rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id) getattr(rule.ctl, action)() except Rule.DoesNotExist: raise RuleNotFoundError() return Response('OK', 200)
def rename_rule(request): """ Tags: rules --- Rename a rule --- rule: in: path type: string required: true description: the UUID of the rule to be updated title: in: query type: string required: true description: the rule's new title """ auth_context = auth_context_from_request(request) title = params_from_request(request).get('title') rule_id = request.matchdict.get('rule') if not auth_context.is_owner(): raise UnauthorizedError('Restricted to Owners') if not title: raise RequiredParameterMissingError('title') try: rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id) rule.ctl.rename(title) except Rule.DoesNotExist: raise RuleNotFoundError() return Response('OK', 200)
def update_rule(request): """ Tags: rules --- Update a rule given its UUID The expected request body is the same as for the `add_rule` endpoint. The difference is that none of the parameters are required. Only the specified parameters will be updated, leaving the rest unchanged. READ permission required on cloud EDIT_RULES permission required on machine --- rule_id: in: path type: string required: true description: the UUID of the rule to be updated """ auth_context = auth_context_from_request(request) params = dict(params_from_request(request).copy()) rule_id = request.matchdict.get('rule') try: rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id) rule.ctl.set_auth_context(auth_context) rule.ctl.update(**params) Notification.objects( # Delete related notifications. owner=auth_context.owner, rtype='rule', rid=rule_id).delete() except Rule.DoesNotExist: raise RuleNotFoundError() return rule.as_dict()
def triggered(request): """ Tags: rules --- Process a trigger sent by the alert service. Based on the parameters of the request, this method will initiate actions to mitigate the conditions that triggered the rule and notify the users. --- value: type: integer required: true description: > the value that triggered the rule by exceeding the threshold incident: type: string required: true description: the incident's UUID resource: type: string required: true description: the UUID of the resource for which the rule got triggered triggered: type: integer required: true description: 0 if the specified incident got resolved/untriggered triggered_now: type: integer required: true description: | 0 in case this is not the first time the specified incident has raised an alert firing_since: type: string required: true description: | the time at which the rule raised an alert and sent a trigger to this API endpoint pending_since: type: string required: true description: | the time at which the rule evaluated to True and entered pending state. A rule can remain in pending state if a TriggerOffset has been configured. Datetime needed resolved_since: type: string required: true description: > the time at which the incident with the specified UUID resolved.\ Datetime needed """ # Do not publicly expose this API endpoint? if config.CILIA_SECRET_KEY != request.headers.get('Cilia-Secret-Key'): raise UnauthorizedError() params = params_from_request(request) keys = ( 'value', 'incident', 'triggered', 'triggered_now', 'firing_since', 'pending_since', 'resolved_since', ) for key in keys: if key not in params: raise RequiredParameterMissingError(key) # Get the rule's UUID. # TODO rule_id = request.matchdict['rule'] rule_id = params['rule_id'] # Get resource and incidents ids. incident_id = str(params['incident']) resource_id = str(params['resource']) # Get timestamps. firing_since = str(params['firing_since']) # pending_since = str(params['pending_since']) resolved_since = str(params['resolved_since']) try: value = params['value'] value = float(value) except (TypeError, ValueError) as err: log.error('Failed to cast "%s" to float: %r', value, err) raise BadRequestError('Failed to convert %s to float' % value) def int_to_bool(param): try: return bool(int(param or 0)) except (ValueError, TypeError) as err: log.error('Failed to cast int to bool: %r', err) raise BadRequestError('Failed to convert %s to boolean' % param) # Get flags indicating whether the incident has been (just) triggered. triggered = int_to_bool(params['triggered']) triggered_now = int_to_bool(params['triggered_now']) # Get the timestamp at which the rule's state changed. try: timestamp = resolved_since or firing_since timestamp = int(get_datetime(timestamp).strftime('%s')) except ValueError as err: log.error('Failed to cast datetime obj to unix timestamp: %r', err) raise BadRequestError(err) try: rule = Rule.objects.get(id=rule_id) except Rule.DoesNotExist: raise RuleNotFoundError() # Validate resource, if the rule is resource-bound. if not rule.is_arbitrary(): resource_type = rule.resource_model_name Model = get_resource_model(resource_type) try: resource = Model.objects.get(id=resource_id, owner=rule.owner_id) except Model.DoesNotExist: raise NotFoundError('%s %s' % (resource_type, resource_id)) if is_resource_missing(resource): raise NotFoundError('%s %s' % (resource_type, resource_id)) else: resource_type = resource_id = None # Record the trigger, if it's a no-data, to refer to it later. if isinstance(rule, NoDataRule): if triggered: NoDataRuleTracker.add(rule.id, resource.id) else: NoDataRuleTracker.remove(rule.id, resource.id) # Run chain of rule's actions. run_chained_actions( rule.id, incident_id, resource_id, resource_type, value, triggered, triggered_now, timestamp, ) return Response('OK', 200)
def run_chained_actions(rule_id, incident_id, resource_id, resource_type, value, triggered, triggered_now, timestamp): """Run a Rule's actions. Runs actions based on the rule's state. This method will initially check whether a NoData alert has been raised in order to call a special NoData action. In case a rule is re-triggered or its state transitions from the untriggered to triggered state and vice versa, relevant events will also be logged. When a monitoring rule is triggered, this method will asynchronously apply a celery chain of the rule's actions that have been specified by user. The chain's tasks are executed sequentially, meaning that the next task in the chain is executed once the one that preceded it has completed successfully. It is IMPORTANT to create a chain of immutable tasks (using the `.si` callable, instead of the more regularly used `.s`), since by default a chained task provides its output as the following task's input, which may yield unexpected behavior. """ try: rule = Rule.objects.get(id=rule_id) except Rule.DoesNotExist: raise RuleNotFoundError() # Log (un)triggered alert. skip_log = False if triggered_now or not triggered else True if skip_log is False: if not rule.is_arbitrary(): Model = get_resource_model(resource_type) resource = Model.objects.get(id=resource_id, owner=rule.owner_id) else: resource = rule.owner _log_alert(resource, rule, value, triggered, timestamp, incident_id, rule.actions[0]) # If the rule got un-triggered or re-triggered, just send a notification # if a NotificationAction has been specified. if not (triggered and triggered_now): action = rule.actions[0] if isinstance(action, NotificationAction): run_action_by_id.delay( rule_id, incident_id, action.id, resource_id, resource_type, value, triggered, timestamp, ) return # Get a list of task signatures for every task, excluding the first one. chain = [] for action in rule.actions[1:]: task = run_action_by_id.si( rule_id, incident_id, action.id, resource_id, resource_type, value, triggered, timestamp, ) chain.append(task) # If there are multiple actions, build a celery chain. if chain: chain = reduce(operator.or_, chain) # Get the task signature of the first action, which was omitted above. action = rule.actions[0] task = run_action_by_id.si( rule_id, incident_id, action.id, resource_id, resource_type, value, triggered, timestamp, ) # Buffer no-data alerts so that we can decide on false-positives. if isinstance(action, NoDataAction): task.set(countdown=config.NO_DATA_ALERT_BUFFER_PERIOD) # Apply all tasks asynchronously. There are 3 scenarios here: # a. If there's only a single task, and not a celery chain, just apply # it # b. If there's a celery chain, group it with the first task, if it's # a NotificationAction, in order for the NotificationAction to not # block the rest of the chain by running them in parallel # c. If there's a celery chain, pipe it to the first task, if that is # not a NotificationAction # TODO Allow multiple NotificationAction's. Permit users to specify # more than a single notification that will notify them of the outcome # of the previously executed task in the chain, whether it succeeded # or not. if not chain: task.apply_async() elif isinstance(action, NotificationAction): from celery import group group(task, chain)() else: chain = operator.or_(task, chain) chain.apply_async()