Example #1
0
def transfer_ownership_to_user(request):
    """
    Tags: ownership

    ---

    Transfer ownership of a resource

    If a resource isn't owned by the requesting user, then an UnauthorizedError
    error will be thrown, unless the requesting user is a member of the Owners
    team.

    """
    auth_context = auth_context_from_request(request)
    params = params_from_request(request)

    if not params.get('user_id'):
        raise RequiredParameterMissingError('user_id')
    try:
        new_owner = User.objects.get(id=params['user_id'])
    except User.DoesNotExist:
        raise NotFoundError('User with id %s' % params['user_id'])

    for rtype, rids in params.get('resources', {}).iteritems():
        Model = get_resource_model(rtype)
        for rid in rids:
            try:
                resource = Model.objects.get(owner=auth_context.owner, id=rid)
                resource.transfer_ownership(auth_context, new_owner)
            except Model.DoesNotExist:
                raise NotFoundError('%s with id %s' % (rtype, rid))

    trigger_session_update(auth_context.owner)

    return Response('OK', 200)
Example #2
0
def run_action_by_id(self, rule_id, incident_id, action_id, resource_id,
                     resource_type, value, triggered, timestamp):
    """Run a Rule's action asynchronously.

    Attempts to run an action, which is identified by its action_id. Such
    tasks are usually called as part of a celery chain, meaning that every
    subsequent task is registered as a callback to the task that preceded
    it. If a task fails and raises an exception, the chain will transition
    to the failure state and the rest of the chain's tasks will not run.

    This task will be retried in case it's failed up to `self.max_retries`
    times with an interval based on `self.default_retry_delay`. Note that
    celery will not move on to the next task in the chain, while a task is
    being retried.

    Extra CAUTION must be taken when raising/suppressing exceptions in order
    to avoid unexpected behavior.

    """
    rule = Rule.objects.get(id=rule_id)
    action = rule.actions.get(id=action_id)

    if rule.is_arbitrary():
        resource = None
    else:
        assert resource_type in rtype_to_classpath, resource_type
        Model = get_resource_model(resource_type)
        resource = Model.objects.get(id=resource_id, owner=rule.owner_id)

    try:
        action.run(resource, value, triggered, timestamp, incident_id)
    except (ServiceUnavailableError, CloudUnavailableError) as err:
        # Catch errors due to SSH connectivity issues and the cloud provider's
        # API being unresponsive. Log the failure if there are no more retries.
        if self.request.retries >= self.max_retries:
            _log_alert(resource,
                       rule,
                       value,
                       triggered,
                       timestamp,
                       incident_id,
                       error=str(err))
        # Retry task with a linear back-off to minimize the chances of hitting
        # the same error again.
        countdown = (self.default_retry_delay * (self.request.retries + 1))
        # After max_retries have been exceeded, this will re-raise the original
        # exception.
        self.retry(exc=err, countdown=countdown)
    except MachineUnauthorizedError as err:
        # Catch exception, log it, and re-raise to improve auditing. Re-raising
        # the exception is important in order to stop the chain's execution.
        _log_alert(resource,
                   rule,
                   value,
                   triggered,
                   timestamp,
                   incident_id,
                   error=str(err))
        raise
    except MistError as err:
        log.error("Error running %s: %r", action, err)
        _log_alert(resource,
                   rule,
                   value,
                   triggered,
                   timestamp,
                   incident_id,
                   error=str(err))
        raise
    except Exception as err:
        log.error("Error running %s: %r", action, err)
        raise
Example #3
0
def tag_resources(request):
    """
    Tags: tags
    ---
    Batch operation for adding/removing tags from a list of resources.
    This api call provides the ability to modify the tags of a large number
    of resources. For each resource a list of dicts is passed with a key, a
    value and optionally an op field. The op field should be either '+' or '-'
    and defines whether or not the tag should be added or removed from the
    resource. If no op value is defined in the dict then '+' is assumed.
    ---
    tags:
      required: true
      type: array
    resource:
      required: true
      type: object
    """

    auth_context = auth_context_from_request(request)
    params = params_from_request(request)

    # FIXME: This implementation is far from OK. We need to re-code the way
    # tags are handled and make sure that RBAC is properly enforced on tags
    for resource in params:
        # list of dicts of key-value pairs
        resource_tags = resource.get('tags', '')
        # dict of resource info
        resource_data = resource.get('resource', '')

        if not resource_data:
            raise RequiredParameterMissingError("resources")
        if not resource_tags:
            raise RequiredParameterMissingError("tags")
        if not resource_data.get('type') or not resource_data.get('item_id'):
            raise BadRequestError('No type or rid provided for some of the '
                                  'resources')

        # ui send this var only for machine. image, network, location
        cloud_id = resource_data.get('cloud_id')

        if cloud_id:
            auth_context.check_perm('cloud', 'read', cloud_id)
        elif resource_data['type'] in [
                'machine', 'image', 'network', 'volume'
        ]:
            raise RequiredParameterMissingError("cloud_id")
        else:
            del resource_data['cloud_id']

        query = {}
        rtype = resource_data['type']
        rid = resource_data['item_id']
        if rtype == 'machine':
            query['machine_id'] = rid
        else:
            query['id'] = rid

        if cloud_id:
            query['cloud'] = cloud_id

        try:
            resource_obj = get_resource_model(rtype).objects.get(**query)
        except me.DoesNotExist:
            # if the resource can not be found just go on and process the next
            continue

        # SEC require EDIT_TAGS permission on resource
        auth_context.check_perm(rtype, 'edit_tags', resource_obj.id)

        # normalized_resources.append(resource_data)
        query['rtype'] = rtype

        # split the tags into two lists: those that will be added and those
        # that will be removed
        tags_to_add = [
            (tag['key'], tag['value']) for tag in
            [tag for tag in resource_tags if tag.get('op', '+') == '+']
        ]
        # also extract the keys from all the tags to be deleted
        tags_to_remove = [
            tag['key'] for tag in
            [tag for tag in resource_tags if tag.get('op', '+') == '-']
        ]

        # SEC only Org Owners may edit the secure tags
        tags = {tag[0]: tag[1] for tag in tags_to_add}
        if not modify_security_tags(auth_context, tags, resource_obj):
            auth_context._raise(rtype, 'edit_security_tags')

        old_tags = get_tags_for_resource(auth_context.owner, resource_obj)
        if tags_to_add:
            add_tags_to_resource(auth_context.owner, resource_obj, tags_to_add)
        if tags_to_remove:
            remove_tags_from_resource(auth_context.owner, resource_obj,
                                      tags_to_remove)

        if rtype in ['machine', 'network', 'volume', 'zone', 'record']:
            new_tags = get_tags_for_resource(auth_context.owner, resource_obj)
            try:
                external_id = getattr(resource_obj, rtype + '_id')
            except AttributeError:
                external_id = getattr(resource_obj, 'external_id')
            patch = jsonpatch.JsonPatch.from_diff(old_tags, new_tags).patch
            for item in patch:
                item['path'] = '/%s-%s/tags%s' % (resource_obj.id, external_id,
                                                  item['path'])
            if amqp_owner_listening(resource_obj.cloud.owner.id):
                amqp_publish_user(auth_context.owner.id,
                                  routing_key='patch_%ss' % rtype,
                                  data={
                                      'cloud_id': resource_obj.cloud.id,
                                      'patch': patch
                                  })
    return OK
Example #4
0
def tag_resources(request):
    """
    Batch operation for adding/removing tags from a list of resources.
    This api call provides the ability to modify the tags of a large number
    of resources. For each resource a list of dicts is passed with a key, a
    value and optionally an op field. The op field should be either '+' or '-'
    and defines whether or not the tag should be added or removed from the
    resource. If no op value is defined in the dict then '+' is assumed.
    ---
    tags:
      required: true
      type: list
    resource:
      required: true
      type: dict
    """

    auth_context = auth_context_from_request(request)
    params = params_from_request(request)

    # FIXME: This implementation is far from OK. We need to re-code the way
    # tags are handled and make sure that RBAC is properly enforced on tags

    for resource in params:
        # list of dicts of key-value pairs
        resource_tags = resource.get('tags', '')
        # dict of resource info
        resource_data = resource.get('resource', '')

        if not resource_data:
            raise RequiredParameterMissingError("resources")
        if not resource_tags:
            raise RequiredParameterMissingError("tags")
        if not resource_data.get('type') or not resource_data.get('item_id'):
            raise BadRequestError('No type or rid provided for some of the '
                                  'resources')

        # ui send this var only for machine. image, network, location
        cloud_id = resource_data.get('cloud_id')

        if cloud_id:
            auth_context.check_perm('cloud', 'read', cloud_id)
        elif resource_data['type'] in [
                'machine', 'image', 'network', 'location'
        ]:
            raise RequiredParameterMissingError("cloud_id")
        else:
            del resource_data['cloud_id']

        query = {}
        rtype = resource_data['type']
        rid = resource_data['item_id']
        if rtype == 'machine':
            query['machine_id'] = rid
        else:
            query['id'] = rid
            query['deleted'] = None

        if cloud_id:
            query['cloud'] = cloud_id

        try:
            resource_obj = get_resource_model(rtype).objects.get(**query)
        except me.DoesNotExist:
            # if the resource can not be found just go on and process the next
            continue

        # SEC require EDIT_TAGS permission on resource
        auth_context.check_perm(rtype, 'edit_tags', resource_obj.id)

        # normalized_resources.append(resource_data)
        query['rtype'] = rtype

        # split the tags into two lists: those that will be added and those
        # that will be removed
        tags_to_add = [(tag['key'], tag['value']) for tag in filter(
            lambda tag: tag.get('op', '+') == '+', resource_tags)]
        # also extract the keys from all the tags to be deleted
        tags_to_remove = map(
            lambda tag: tag['key'],
            filter(lambda tag: tag.get('op', '+') == '-', resource_tags))

        # SEC only Org Owners may edit the secure tags
        tags = {tag[0]: tag[1] for tag in tags_to_add}
        if not modify_security_tags(auth_context, tags, resource_obj):
            auth_context._raise(rtype, 'edit_security_tags')

        if tags_to_add:
            add_tags_to_resource(auth_context.owner, resource_obj, tags_to_add)
        if tags_to_remove:
            remove_tags_from_resource(auth_context.owner, resource_obj,
                                      tags_to_remove)

    return OK
Example #5
0
 def condition_resource_cls(self):
     return get_resource_model(self.resource_model_name)
Example #6
0
def triggered(request):
    """
    Tags: rules
    ---
    Process a trigger sent by the alert service.

    Based on the parameters of the request, this method will initiate actions
    to mitigate the conditions that triggered the rule and notify the users.

    ---

    value:
     type: integer
     required: true
     description: >
       the value that triggered the rule by exceeding the threshold
    incident:
     type: string
     required: true
     description: the incident's UUID
    resource:
     type: string
     required: true
     description: the UUID of the resource for which the rule got triggered
    triggered:
     type: integer
     required: true
     description: 0 if the specified incident got resolved/untriggered
    triggered_now:
     type: integer
     required: true
     description: |
       0 in case this is not the first time the specified incident has
       raised an alert
    firing_since:
     type: string
     required: true
     description: |
       the time at which the rule raised an alert and sent a trigger to
       this API endpoint
    pending_since:
     type: string
     required: true
     description: |
       the time at which the rule evaluated to True and entered pending
       state. A rule can remain in pending state if a TriggerOffset has
       been configured. Datetime needed
    resolved_since:
     type: string
     required: true
     description: >
       the time at which the incident with the specified UUID resolved.\
       Datetime needed

    """
    # Do not publicly expose this API endpoint?
    if config.CILIA_SECRET_KEY != request.headers.get('Cilia-Secret-Key'):
        raise UnauthorizedError()

    params = params_from_request(request)

    keys = (
        'value',
        'incident',
        'triggered',
        'triggered_now',
        'firing_since',
        'pending_since',
        'resolved_since',
    )
    for key in keys:
        if key not in params:
            raise RequiredParameterMissingError(key)

    # Get the rule's UUID.
    # TODO rule_id = request.matchdict['rule']
    rule_id = params['rule_id']

    # Get resource and incidents ids.
    incident_id = str(params['incident'])
    resource_id = str(params['resource'])

    # Get timestamps.
    firing_since = str(params['firing_since'])
    # pending_since = str(params['pending_since'])
    resolved_since = str(params['resolved_since'])

    try:
        value = params['value']
        value = float(value)
    except (TypeError, ValueError) as err:
        log.error('Failed to cast "%s" to float: %r', value, err)
        raise BadRequestError('Failed to convert %s to float' % value)

    def int_to_bool(param):
        try:
            return bool(int(param or 0))
        except (ValueError, TypeError) as err:
            log.error('Failed to cast int to bool: %r', err)
            raise BadRequestError('Failed to convert %s to boolean' % param)

    # Get flags indicating whether the incident has been (just) triggered.
    triggered = int_to_bool(params['triggered'])
    triggered_now = int_to_bool(params['triggered_now'])

    # Get the timestamp at which the rule's state changed.
    try:
        timestamp = resolved_since or firing_since
        timestamp = int(get_datetime(timestamp).strftime('%s'))
    except ValueError as err:
        log.error('Failed to cast datetime obj to unix timestamp: %r', err)
        raise BadRequestError(err)

    try:
        rule = Rule.objects.get(id=rule_id)
    except Rule.DoesNotExist:
        raise RuleNotFoundError()

    # Validate resource, if the rule is resource-bound.
    if not rule.is_arbitrary():
        resource_type = rule.resource_model_name
        Model = get_resource_model(resource_type)
        try:
            resource = Model.objects.get(id=resource_id, owner=rule.owner_id)
        except Model.DoesNotExist:
            raise NotFoundError('%s %s' % (resource_type, resource_id))
        if is_resource_missing(resource):
            raise NotFoundError('%s %s' % (resource_type, resource_id))
    else:
        resource_type = resource_id = None

    # Record the trigger, if it's a no-data, to refer to it later.
    if isinstance(rule, NoDataRule):
        if triggered:
            NoDataRuleTracker.add(rule.id, resource.id)
        else:
            NoDataRuleTracker.remove(rule.id, resource.id)
    # Run chain of rule's actions.
    run_chained_actions(
        rule.id,
        incident_id,
        resource_id,
        resource_type,
        value,
        triggered,
        triggered_now,
        timestamp,
    )
    return Response('OK', 200)
Example #7
0
 def selector_resource_cls(self):
     return get_resource_model(self.resource_model_name)
Example #8
0
def run_chained_actions(rule_id, incident_id, resource_id, resource_type,
                        value, triggered, triggered_now, timestamp):
    """Run a Rule's actions.

    Runs actions based on the rule's state. This method will initially check
    whether a NoData alert has been raised in order to call a special NoData
    action. In case a rule is re-triggered or its state transitions from the
    untriggered to triggered state and vice versa, relevant events will also
    be logged.

    When a monitoring rule is triggered, this method will asynchronously apply
    a celery chain of the rule's actions that have been specified by user. The
    chain's tasks are executed sequentially, meaning that the next task in the
    chain is executed once the one that preceded it has completed successfully.

    It is IMPORTANT to create a chain of immutable tasks (using the `.si`
    callable, instead of the more regularly used `.s`), since by default a
    chained task provides its output as the following task's input, which may
    yield unexpected behavior.

    """
    try:
        rule = Rule.objects.get(id=rule_id)
    except Rule.DoesNotExist:
        raise RuleNotFoundError()
    # Log (un)triggered alert.
    skip_log = False if triggered_now or not triggered else True
    if skip_log is False:
        if not rule.is_arbitrary():
            Model = get_resource_model(resource_type)
            resource = Model.objects.get(id=resource_id, owner=rule.owner_id)
        else:
            resource = rule.owner
        _log_alert(resource, rule, value, triggered, timestamp, incident_id,
                   rule.actions[0])
    # If the rule got un-triggered or re-triggered, just send a notification
    # if a NotificationAction has been specified.
    if not (triggered and triggered_now):
        action = rule.actions[0]
        if isinstance(action, NotificationAction):
            run_action_by_id.delay(
                rule_id,
                incident_id,
                action.id,
                resource_id,
                resource_type,
                value,
                triggered,
                timestamp,
            )
        return

    # Get a list of task signatures for every task, excluding the first one.
    chain = []
    for action in rule.actions[1:]:
        task = run_action_by_id.si(
            rule_id,
            incident_id,
            action.id,
            resource_id,
            resource_type,
            value,
            triggered,
            timestamp,
        )
        chain.append(task)

    # If there are multiple actions, build a celery chain.
    if chain:
        chain = reduce(operator.or_, chain)

    # Get the task signature of the first action, which was omitted above.
    action = rule.actions[0]
    task = run_action_by_id.si(
        rule_id,
        incident_id,
        action.id,
        resource_id,
        resource_type,
        value,
        triggered,
        timestamp,
    )

    # Buffer no-data alerts so that we can decide on false-positives.
    if isinstance(action, NoDataAction):
        task.set(countdown=config.NO_DATA_ALERT_BUFFER_PERIOD)

    # Apply all tasks asynchronously. There are 3 scenarios here:
    # a. If there's only a single task, and not a celery chain, just apply
    #    it
    # b. If there's a celery chain, group it with the first task, if it's
    #    a NotificationAction, in order for the NotificationAction to not
    #    block the rest of the chain by running them in parallel
    # c. If there's a celery chain, pipe it to the first task, if that is
    #    not a NotificationAction
    # TODO Allow multiple NotificationAction's. Permit users to specify
    # more than a single notification that will notify them of the outcome
    # of the previously executed task in the chain, whether it succeeded
    # or not.
    if not chain:
        task.apply_async()
    elif isinstance(action, NotificationAction):
        from celery import group
        group(task, chain)()
    else:
        chain = operator.or_(task, chain)
        chain.apply_async()