예제 #1
0
def delete_rule(request):
    """
    Tags: rules
    ---
    Delete a rule given its UUID.

    READ permission required on Cloud.
    EDIT_RULES permission required on Machine

    ---

    rule:
      in: path
      type: string
      required: true
      description: the unique identifier of the rule to be deleted

    """
    auth_context = auth_context_from_request(request)
    rule_id = request.matchdict.get('rule')
    try:
        rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id)
        rule.ctl.set_auth_context(auth_context)
        rule.ctl.delete()
        Notification.objects(  # Delete related notifications.
            owner=auth_context.owner,
            rtype='rule',
            rid=rule_id).delete()
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    return Response('OK', 200)
예제 #2
0
def toggle_rule(request):
    """
    Tags: rules
    ---
    Enable or disable a rule

    Permits Owners to temporarily disable or re-enable a rule's evaluation

    ---

    rule:
      in: path
      type: string
      required: true
      description: the UUID of the rule to be updated

    action:
      in: query
      type: string
      required: true
      description: the action to perform (enable, disable)

    """
    auth_context = auth_context_from_request(request)
    action = params_from_request(request).get('action')
    rule_id = request.matchdict.get('rule')

    if not auth_context.is_owner():
        raise UnauthorizedError('Restricted to Owners')

    if not action:
        raise RequiredParameterMissingError('action')

    if action not in (
            'enable',
            'disable',
    ):
        raise BadRequestError('Action must be one of (enable, disable)')

    try:
        rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id)
        getattr(rule.ctl, action)()
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    return Response('OK', 200)
예제 #3
0
def rename_rule(request):
    """
    Tags: rules
    ---
    Rename a rule

    ---

    rule:
      in: path
      type: string
      required: true
      description: the UUID of the rule to be updated

    title:
      in: query
      type: string
      required: true
      description: the rule's new title

    """
    auth_context = auth_context_from_request(request)
    title = params_from_request(request).get('title')
    rule_id = request.matchdict.get('rule')

    if not auth_context.is_owner():
        raise UnauthorizedError('Restricted to Owners')

    if not title:
        raise RequiredParameterMissingError('title')

    try:
        rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id)
        rule.ctl.rename(title)
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    return Response('OK', 200)
예제 #4
0
def update_rule(request):
    """
    Tags: rules
    ---
    Update a rule given its UUID

    The expected request body is the same as for the `add_rule` endpoint. The
    difference is that none of the parameters are required. Only the specified
    parameters will be updated, leaving the rest unchanged.

    READ permission required on cloud
    EDIT_RULES permission required on machine

    ---

    rule_id:
      in: path
      type: string
      required: true
      description: the UUID of the rule to be updated

    """
    auth_context = auth_context_from_request(request)
    params = dict(params_from_request(request).copy())
    rule_id = request.matchdict.get('rule')
    try:
        rule = Rule.objects.get(owner_id=auth_context.owner.id, id=rule_id)
        rule.ctl.set_auth_context(auth_context)
        rule.ctl.update(**params)
        Notification.objects(  # Delete related notifications.
            owner=auth_context.owner,
            rtype='rule',
            rid=rule_id).delete()
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    return rule.as_dict()
예제 #5
0
def triggered(request):
    """
    Tags: rules
    ---
    Process a trigger sent by the alert service.

    Based on the parameters of the request, this method will initiate actions
    to mitigate the conditions that triggered the rule and notify the users.

    ---

    value:
     type: integer
     required: true
     description: >
       the value that triggered the rule by exceeding the threshold
    incident:
     type: string
     required: true
     description: the incident's UUID
    resource:
     type: string
     required: true
     description: the UUID of the resource for which the rule got triggered
    triggered:
     type: integer
     required: true
     description: 0 if the specified incident got resolved/untriggered
    triggered_now:
     type: integer
     required: true
     description: |
       0 in case this is not the first time the specified incident has
       raised an alert
    firing_since:
     type: string
     required: true
     description: |
       the time at which the rule raised an alert and sent a trigger to
       this API endpoint
    pending_since:
     type: string
     required: true
     description: |
       the time at which the rule evaluated to True and entered pending
       state. A rule can remain in pending state if a TriggerOffset has
       been configured. Datetime needed
    resolved_since:
     type: string
     required: true
     description: >
       the time at which the incident with the specified UUID resolved.\
       Datetime needed

    """
    # Do not publicly expose this API endpoint?
    if config.CILIA_SECRET_KEY != request.headers.get('Cilia-Secret-Key'):
        raise UnauthorizedError()

    params = params_from_request(request)

    keys = (
        'value',
        'incident',
        'triggered',
        'triggered_now',
        'firing_since',
        'pending_since',
        'resolved_since',
    )
    for key in keys:
        if key not in params:
            raise RequiredParameterMissingError(key)

    # Get the rule's UUID.
    # TODO rule_id = request.matchdict['rule']
    rule_id = params['rule_id']

    # Get resource and incidents ids.
    incident_id = str(params['incident'])
    resource_id = str(params['resource'])

    # Get timestamps.
    firing_since = str(params['firing_since'])
    # pending_since = str(params['pending_since'])
    resolved_since = str(params['resolved_since'])

    try:
        value = params['value']
        value = float(value)
    except (TypeError, ValueError) as err:
        log.error('Failed to cast "%s" to float: %r', value, err)
        raise BadRequestError('Failed to convert %s to float' % value)

    def int_to_bool(param):
        try:
            return bool(int(param or 0))
        except (ValueError, TypeError) as err:
            log.error('Failed to cast int to bool: %r', err)
            raise BadRequestError('Failed to convert %s to boolean' % param)

    # Get flags indicating whether the incident has been (just) triggered.
    triggered = int_to_bool(params['triggered'])
    triggered_now = int_to_bool(params['triggered_now'])

    # Get the timestamp at which the rule's state changed.
    try:
        timestamp = resolved_since or firing_since
        timestamp = int(get_datetime(timestamp).strftime('%s'))
    except ValueError as err:
        log.error('Failed to cast datetime obj to unix timestamp: %r', err)
        raise BadRequestError(err)

    try:
        rule = Rule.objects.get(id=rule_id)
    except Rule.DoesNotExist:
        raise RuleNotFoundError()

    # Validate resource, if the rule is resource-bound.
    if not rule.is_arbitrary():
        resource_type = rule.resource_model_name
        Model = get_resource_model(resource_type)
        try:
            resource = Model.objects.get(id=resource_id, owner=rule.owner_id)
        except Model.DoesNotExist:
            raise NotFoundError('%s %s' % (resource_type, resource_id))
        if is_resource_missing(resource):
            raise NotFoundError('%s %s' % (resource_type, resource_id))
    else:
        resource_type = resource_id = None

    # Record the trigger, if it's a no-data, to refer to it later.
    if isinstance(rule, NoDataRule):
        if triggered:
            NoDataRuleTracker.add(rule.id, resource.id)
        else:
            NoDataRuleTracker.remove(rule.id, resource.id)
    # Run chain of rule's actions.
    run_chained_actions(
        rule.id,
        incident_id,
        resource_id,
        resource_type,
        value,
        triggered,
        triggered_now,
        timestamp,
    )
    return Response('OK', 200)
예제 #6
0
def run_chained_actions(rule_id, incident_id, resource_id, resource_type,
                        value, triggered, triggered_now, timestamp):
    """Run a Rule's actions.

    Runs actions based on the rule's state. This method will initially check
    whether a NoData alert has been raised in order to call a special NoData
    action. In case a rule is re-triggered or its state transitions from the
    untriggered to triggered state and vice versa, relevant events will also
    be logged.

    When a monitoring rule is triggered, this method will asynchronously apply
    a celery chain of the rule's actions that have been specified by user. The
    chain's tasks are executed sequentially, meaning that the next task in the
    chain is executed once the one that preceded it has completed successfully.

    It is IMPORTANT to create a chain of immutable tasks (using the `.si`
    callable, instead of the more regularly used `.s`), since by default a
    chained task provides its output as the following task's input, which may
    yield unexpected behavior.

    """
    try:
        rule = Rule.objects.get(id=rule_id)
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    # Log (un)triggered alert.
    skip_log = False if triggered_now or not triggered else True
    if skip_log is False:
        if not rule.is_arbitrary():
            Model = get_resource_model(resource_type)
            resource = Model.objects.get(id=resource_id, owner=rule.owner_id)
        else:
            resource = rule.owner
        _log_alert(resource, rule, value, triggered, timestamp, incident_id,
                   rule.actions[0])
    # If the rule got un-triggered or re-triggered, just send a notification
    # if a NotificationAction has been specified.
    if not (triggered and triggered_now):
        action = rule.actions[0]
        if isinstance(action, NotificationAction):
            run_action_by_id.delay(
                rule_id,
                incident_id,
                action.id,
                resource_id,
                resource_type,
                value,
                triggered,
                timestamp,
            )
        return

    # Get a list of task signatures for every task, excluding the first one.
    chain = []
    for action in rule.actions[1:]:
        task = run_action_by_id.si(
            rule_id,
            incident_id,
            action.id,
            resource_id,
            resource_type,
            value,
            triggered,
            timestamp,
        )
        chain.append(task)

    # If there are multiple actions, build a celery chain.
    if chain:
        chain = reduce(operator.or_, chain)

    # Get the task signature of the first action, which was omitted above.
    action = rule.actions[0]
    task = run_action_by_id.si(
        rule_id,
        incident_id,
        action.id,
        resource_id,
        resource_type,
        value,
        triggered,
        timestamp,
    )

    # Buffer no-data alerts so that we can decide on false-positives.
    if isinstance(action, NoDataAction):
        task.set(countdown=config.NO_DATA_ALERT_BUFFER_PERIOD)

    # Apply all tasks asynchronously. There are 3 scenarios here:
    # a. If there's only a single task, and not a celery chain, just apply
    #    it
    # b. If there's a celery chain, group it with the first task, if it's
    #    a NotificationAction, in order for the NotificationAction to not
    #    block the rest of the chain by running them in parallel
    # c. If there's a celery chain, pipe it to the first task, if that is
    #    not a NotificationAction
    # TODO Allow multiple NotificationAction's. Permit users to specify
    # more than a single notification that will notify them of the outcome
    # of the previously executed task in the chain, whether it succeeded
    # or not.
    if not chain:
        task.apply_async()
    elif isinstance(action, NotificationAction):
        from celery import group
        group(task, chain)()
    else:
        chain = operator.or_(task, chain)
        chain.apply_async()