コード例 #1
0
class AlertStateBase(SparseModel):
    class Meta:
        abstract = True
        unique_together = ('alert_item_type', 'alert_item_id', 'alert_type',
                           'active')
        ordering = ['id']
        app_label = 'chroma_core'

    table_name = 'chroma_core_alertstate'
    """Records a period of time during which a particular
       issue affected a particular element of the system"""
    alert_item_type = models.ForeignKey(ContentType, null=True)
    alert_item_id = models.PositiveIntegerField(null=True)
    # FIXME: generic foreign key does not automatically set up deletion
    # of this when the alert_item is deleted -- do it manually
    alert_item = GenericForeignKey('alert_item_type', 'alert_item_id')

    alert_type = models.CharField(max_length=128)

    begin = models.DateTimeField(help_text="Time at which the alert started",
                                 default=timezone.now)
    end = models.DateTimeField(help_text="Time at which the alert was resolved\
            if active is false, else time that the alert was last checked (e.g.\
            time when we last checked an offline target was still not offline)",
                               null=True)

    _message = models.TextField(
        db_column='message',
        null=True,
        help_text=
        'Message associated with the Alert. Created at Alert creation time')

    # Note: use True and None instead of True and False so that
    # unique-together constraint only applied to active alerts
    active = models.NullBooleanField()

    # whether a user has manually dismissed alert
    dismissed = models.BooleanField(default=False,
                                    help_text="True denotes that the user "
                                    "has acknowledged this alert.")

    severity = models.IntegerField(default=logging.INFO,
                                   help_text=("String indicating the "
                                              "severity of the alert, "
                                              "one of %s") %
                                   STR_TO_SEVERITY.keys())

    # This is only used by one event ClientConnectEvent but it is critical and so needs to be searchable etc
    # for that reason it can't use the variant
    lustre_pid = models.IntegerField(null=True)

    # Subclasses set this, used as a default in .notify()
    default_severity = logging.INFO

    # For historical compatibility anything called Alert will send and alert email and anything else won't.
    # This can obviously be overridden by any particular event but gives us a like for behaviour.
    @property
    def require_mail_alert(self):
        return "Alert\'>" in str(type(self))

    def get_active_bool(self):
        return bool(self.active)

    def set_active_bool(self, value):
        if value:
            self.active = True
        else:
            self.active = None

    active_bool = property(get_active_bool, set_active_bool)

    def to_dict(self):
        from chroma_core.lib.util import time_str
        return {
            'alert_severity':
            'alert',  # FIXME: Still need to figure out weather to pass enum or display string.
            'alert_item':
            str(self.alert_item),
            'alert_message':
            self.message(),
            'message':
            self.message(),
            'active':
            bool(self.active),
            'begin':
            time_str(self.begin),
            'end':
            time_str(self.end)
            if self.end is not None else time_str(self.begin),
            'id':
            self.id,
            'alert_item_id':
            self.alert_item_id,
            'alert_item_content_type_id':
            self.alert_item_type_id
        }

    @property
    def affected_objects(self):
        """
        :return: A list of objects other than the alert_item that are affected by this alert
        """
        return []

    def end_event(self):
        return None

    def alert_message(self):
        raise NotImplementedError()

    def message(self):
        # The first time this is call __message will be none, so we have to call alert_message to
        # create the message and then save it. This will occur once for each message.
        # In the future for new alerts we will try and create them when the Alert is created but
        # at the time this patch is produced that is tricky.
        # The purpose of this is to make it so that Alerts can continue to operate when the data required
        # to create the message no longer exists.
        # It's a small step for HYD-5736 and a move towards a more efficient model.
        if self._message is None:
            self._message = self.alert_message()
            self.save()

        return self._message

    def affected_targets(self, affect_target):
        pass

    @classmethod
    def subclasses(cls):
        all_subclasses = []
        for subclass in cls.__subclasses__():
            all_subclasses.append(subclass)
            all_subclasses.extend(subclass.subclasses())
        return all_subclasses

    @classmethod
    def filter_by_item(cls, item):
        if hasattr(item, 'content_type'):
            # A DowncastMetaclass object
            return cls.objects.filter(active=True,
                                      alert_item_id=item.id,
                                      alert_item_type=item.content_type)
        else:
            return cls.objects.filter(
                active=True,
                alert_item_id=item.pk,
                alert_item_type__model=item.__class__.__name__.lower(),
                alert_item_type__app_label=item.__class__._meta.app_label)

    @classmethod
    def filter_by_item_id(cls, item_class, item_id):
        return cls.objects.filter(
            active=True,
            alert_item_id=item_id,
            alert_item_type__model=item_class.__name__.lower(),
            alert_item_type__app_label=item_class._meta.app_label)

    @classmethod
    def notify(cls, alert_item, active, **kwargs):
        """Notify an alert in the default severity level for that alert"""

        return cls._notify(alert_item, active, **kwargs)

    @classmethod
    def notify_warning(cls, alert_item, active, **kwargs):
        """Notify an alert in at most the WARNING severity level"""

        kwargs['attrs_to_save'] = {
            'severity': min(cls.default_severity, logging.WARNING)
        }
        return cls._notify(alert_item, active, **kwargs)

    @classmethod
    def _notify(cls, alert_item, active, **kwargs):
        if hasattr(alert_item, 'content_type'):
            alert_item = alert_item.downcast()

        if active:
            return cls.high(alert_item, **kwargs)
        else:
            return cls.low(alert_item, **kwargs)

    @classmethod
    def _get_attrs_to_save(cls, kwargs):
        # Prepare data to be saved with alert, but not effect the filter_by_item() below
        # e.g. Only one alert type per alert item can be active, so we don't need to filter on severity.
        attrs_to_save = kwargs.pop('attrs_to_save', {})

        # Add any properties to the attrs_to_save that are not db fields, we can't search on
        # non db fields after all. Some alerts have custom fields and they will be searched out here.
        fields = [field.attname for field in cls._meta.fields]
        for attr in kwargs.keys():
            if attr not in fields:
                attrs_to_save[attr] = kwargs.pop(attr)

        return attrs_to_save

    @classmethod
    def high(cls, alert_item, **kwargs):
        if hasattr(alert_item,
                   'not_deleted') and alert_item.not_deleted != True:
            return None

        attrs_to_save = cls._get_attrs_to_save(kwargs)

        try:
            alert_state = cls.filter_by_item(alert_item).get(**kwargs)
        except cls.DoesNotExist:
            kwargs.update(attrs_to_save)

            if not 'alert_type' in kwargs:
                kwargs['alert_type'] = cls.__name__
            if not 'severity' in kwargs:
                kwargs['severity'] = cls.default_severity

            alert_state = cls(
                active=True,
                dismissed=False,  # Users dismiss, not the software
                alert_item=alert_item,
                **kwargs)
            try:
                alert_state._message = alert_state.alert_message()
                alert_state.save()
                job_log.info(
                    "AlertState: Raised %s on %s "
                    "at severity %s" %
                    (cls, alert_state.alert_item, alert_state.severity))
            except IntegrityError, e:
                job_log.warning(
                    "AlertState: IntegrityError %s saving %s : %s : %s" %
                    (e, cls.__name__, alert_item, kwargs))
                # Handle colliding inserts: drop out here, no need to update
                # the .end of the existing record as we are logically concurrent
                # with the creator.
                return None
        return alert_state
コード例 #2
0
class AlertResource(SeverityResource):
    """
    Notification of a bad health state.  Alerts refer to particular objects (such as
    servers or targets), and can either be active (indicating this is a current
    problem) or inactive (indicating this is a historical record of a problem).
    """

    message = fields.CharField(readonly=True,
                               help_text=("Human readable description "
                                          "of the alert, about one sentence"))

    alert_item = fields.CharField(help_text="URI of affected item")

    affected = fields.ListField(
        null=True,
        help_text=("List of objects which are affected by the alert "
                   "(e.g. a target alert also affects the file system to "
                   "which the target belongs)"),
    )

    alert_item_str = fields.CharField(
        readonly=True,
        help_text=("A human readable noun describing the object "
                   "that is the subject of the alert"))

    record_type = fields.CharField(
        attribute="record_type",
        help_text="The type of the alert described as a Python classes",
        enumerations=[
            class_.__name__ for class_ in util.all_subclasses(AlertStateBase)
        ],
    )

    severity = fields.CharField(
        attribute="severity",
        help_text=("String indicating the "
                   "severity of the alert, "
                   "one of %s") % STR_TO_SEVERITY.keys(),
        enumerations=STR_TO_SEVERITY.keys(),
    )

    def prepend_urls(self):
        return [
            url(
                r"^(?P<resource_name>%s)/dismiss_all%s$" %
                (self._meta.resource_name, trailing_slash()),
                self.wrap_view("dismiss_all"),
                name="api_alert_dismiss_all",
            )
        ]

    def dismiss_all(self, request, **kwargs):
        if (request.method != "PUT") or (not request.user.is_authenticated()):
            return http.HttpUnauthorized()

        AlertState.objects.filter(dismissed=False).exclude(
            active=True, severity__in=[40, 30]).update(dismissed=True)

        return http.HttpNoContent()

    def dehydrate_alert_item(self, bundle):
        from chroma_api.urls import api

        return api.get_resource_uri(bundle.obj.alert_item)

    def dehydrate_alert_item_str(self, bundle):
        return str(bundle.obj.alert_item)

    def dehydrate_message(self, bundle):
        return bundle.obj.message()

    def dehydrate_affected(self, bundle):
        from chroma_api.urls import api

        alert = bundle.obj

        affected_objects = []

        def affect_target(target):
            affected_objects.append(target)
            if target.filesystem_member:
                affected_objects.append(target.filesystem)
            elif target.target_type == "mgs":
                for fs in target.managedfilesystem_set.all():
                    affected_objects.append(fs)

        affected_objects.extend(alert.affected_objects)

        alert.affected_targets(affect_target)

        affected_objects.append(alert.alert_item)

        return [api.get_resource_uri(ao) for ao in set(affected_objects)]

    def build_filters(self, filters=None):

        filters = super(AlertResource, self).build_filters(filters)

        # Map False to None and 'active_bool' to 'active'
        if "active_bool__exact" in filters:
            filters["active__exact"] = None if not filters[
                "active_bool__exact"] else True
            del filters["active_bool__exact"]

        return filters

    class Meta:
        queryset = AlertState.objects.order_by("-begin")
        resource_name = "alert"

        filtering = {
            "begin": SeverityResource.ALL_FILTER_DATE,
            "end": SeverityResource.ALL_FILTER_DATE,
            "message": SeverityResource.ALL_FILTER_STR,
            "active": SeverityResource.ALL_FILTER_BOOL,
            "dismissed": SeverityResource.ALL_FILTER_BOOL,
            "id": SeverityResource.ALL_FILTER_INT,
            "severity": SeverityResource.ALL_FILTER_ENUMERATION,
            "created_at": SeverityResource.ALL_FILTER_DATE,
            "alert_type": SeverityResource.ALL_FILTER_ENUMERATION,
            "alert_item_id": SeverityResource.ALL_FILTER_INT,
            "lustre_pid": SeverityResource.ALL_FILTER_INT,
            "record_type": SeverityResource.ALL_FILTER_ENUMERATION,
        }

        ordering = ["begin", "end", "active"]
        serializer = DateSerializer()
        authorization = DjangoAuthorization()
        authentication = AnonymousAuthentication()
        list_allowed_methods = ["get"]
        detail_allowed_methods = ["get", "patch", "put"]
        always_return_data = True
コード例 #3
0
class AlertResource(LongPollingAPI, SeverityResource):
    """
    Notification of a bad health state.  Alerts refer to particular objects (such as
    servers or targets), and can either be active (indicating this is a current
    problem) or inactive (indicating this is a historical record of a problem).
    """

    message = fields.CharField(readonly=True,
                               help_text=("Human readable description "
                                          "of the alert, about one sentence"))

    alert_item = fields.CharField(help_text="URI of affected item")

    affected = fields.ListField(
        null=True,
        help_text=("List of objects which are affected by the alert "
                   "(e.g. a target alert also affects the file system to "
                   "which the target belongs)"))

    alert_item_str = fields.CharField(
        readonly=True,
        help_text=("A human readable noun describing the object "
                   "that is the subject of the alert"))

    record_type = fields.CharField(
        attribute='record_type',
        help_text="The type of the alert described as a Python classes",
        enumerations=[
            class_.__name__ for class_ in util.all_subclasses(AlertStateBase)
        ])

    severity = fields.CharField(attribute='severity',
                                help_text=("String indicating the "
                                           "severity of the alert, "
                                           "one of %s") %
                                STR_TO_SEVERITY.keys(),
                                enumerations=STR_TO_SEVERITY.keys())

    # Long polling should return when any of the tables below changes or has changed.
    long_polling_tables = [AlertState, LNetOfflineAlert]

    def dispatch(self, request_type, request, **kwargs):
        return self.handle_long_polling_dispatch(request_type, request,
                                                 **kwargs)

    def prepend_urls(self):
        return [
            url(r'^(?P<resource_name>%s)/dismiss_all%s$' %
                (self._meta.resource_name, trailing_slash()),
                self.wrap_view('dismiss_all'),
                name='api_alert_dismiss_all'),
        ]

    def dismiss_all(self, request, **kwargs):
        if (request.method != 'PUT') or (not request.user.is_authenticated()):
            return http.HttpUnauthorized()

        AlertState.objects.filter(dismissed=False).exclude(
            active=True, severity__in=[40, 30]).update(dismissed=True)

        return http.HttpNoContent()

    def dehydrate_alert_item(self, bundle):
        return api.get_resource_uri(bundle.obj.alert_item)

    def dehydrate_alert_item_str(self, bundle):
        return str(bundle.obj.alert_item)

    def dehydrate_message(self, bundle):
        return bundle.obj.message()

    def dehydrate_affected(self, bundle):
        from chroma_api.urls import api

        alert = bundle.obj

        affected_objects = []

        def affect_target(target):
            affected_objects.append(target)
            if target.filesystem_member:
                affected_objects.append(target.filesystem)
            elif target.target_type == "mgs":
                for fs in target.managedfilesystem_set.all():
                    affected_objects.append(fs)

        affected_objects.extend(alert.affected_objects)

        alert.affected_targets(affect_target)

        affected_objects.append(alert.alert_item)

        return [api.get_resource_uri(ao) for ao in set(affected_objects)]

    def build_filters(self, filters=None):

        filters = super(AlertResource, self).build_filters(filters)

        # Map False to None and 'active_bool' to 'active'
        if 'active_bool__exact' in filters:
            filters['active__exact'] = None if not filters[
                'active_bool__exact'] else True
            del filters['active_bool__exact']

        return filters

    class Meta:
        queryset = AlertState.objects.order_by('-begin')
        resource_name = 'alert'

        filtering = {
            'begin': SeverityResource.ALL_FILTER_DATE,
            'end': SeverityResource.ALL_FILTER_DATE,
            'message': SeverityResource.ALL_FILTER_STR,
            'active': SeverityResource.ALL_FILTER_BOOL,
            'dismissed': SeverityResource.ALL_FILTER_BOOL,
            'id': SeverityResource.ALL_FILTER_INT,
            'severity': SeverityResource.ALL_FILTER_ENUMERATION,
            'created_at': SeverityResource.ALL_FILTER_DATE,
            'alert_type': SeverityResource.ALL_FILTER_ENUMERATION,
            'alert_item_id': SeverityResource.ALL_FILTER_INT,
            'lustre_pid': SeverityResource.ALL_FILTER_INT,
            'record_type': SeverityResource.ALL_FILTER_ENUMERATION
        }

        ordering = ['begin', 'end', 'active']
        authorization = DjangoAuthorization()
        authentication = AnonymousAuthentication()
        list_allowed_methods = ['get']
        detail_allowed_methods = ['get', 'patch', 'put']
        always_return_data = True
コード例 #4
0
class AlertStateBase(SparseModel):
    class Meta:
        unique_together = ("alert_item_type", "alert_item_id", "alert_type", "active")
        ordering = ["id"]
        app_label = "chroma_core"
        db_table = "chroma_core_alertstate"

    table_name = "chroma_core_alertstate"

    """Records a period of time during which a particular
       issue affected a particular element of the system"""
    alert_item_type = models.ForeignKey(ContentType, null=True, on_delete=CASCADE)
    alert_item_id = models.PositiveIntegerField(null=True)
    # FIXME: generic foreign key does not automatically set up deletion
    # of this when the alert_item is deleted -- do it manually
    alert_item = GenericForeignKey("alert_item_type", "alert_item_id")

    alert_type = models.CharField(max_length=128)

    begin = models.DateTimeField(help_text="Time at which the alert started", default=timezone.now)
    end = models.DateTimeField(
        help_text="Time at which the alert was resolved\
            if active is false, else time that the alert was last checked (e.g.\
            time when we last checked an offline target was still not offline)",
        null=True,
    )

    _message = models.TextField(
        db_column="message", null=True, help_text="Message associated with the Alert. Created at Alert creation time"
    )

    # Note: use True and None instead of True and False so that
    # unique-together constraint only applied to active alerts
    active = models.NullBooleanField()

    # whether a user has manually dismissed alert
    dismissed = models.BooleanField(
        default=False, help_text="True denotes that the user " "has acknowledged this alert."
    )

    severity = models.IntegerField(
        default=logging.INFO,
        help_text=("String indicating the " "severity of the alert, " "one of %s") % STR_TO_SEVERITY.keys(),
    )

    # This is only used by one event ClientConnectEvent but it is critical and so needs to be searchable etc
    # for that reason it can't use the variant
    lustre_pid = models.IntegerField(null=True)

    # Subclasses set this, used as a default in .notify()
    default_severity = logging.INFO

    # For historical compatibility anything called Alert will send and alert email and anything else won't.
    # This can obviously be overridden by any particular event but gives us a like for behaviour.
    @property
    def require_mail_alert(self):
        return "Alert'>" in str(type(self))

    def get_active_bool(self):
        return bool(self.active)

    def set_active_bool(self, value):
        if value:
            self.active = True
        else:
            self.active = None

    active_bool = property(get_active_bool, set_active_bool)

    def to_dict(self):
        from chroma_core.lib.util import time_str

        return {
            "alert_severity": "alert",  # FIXME: Still need to figure out weather to pass enum or display string.
            "alert_item": str(self.alert_item),
            "alert_message": self.message(),
            "message": self.message(),
            "active": bool(self.active),
            "begin": time_str(self.begin),
            "end": time_str(self.end) if self.end is not None else time_str(self.begin),
            "id": self.id,
            "alert_item_id": self.alert_item_id,
            "alert_item_content_type_id": self.alert_item_type_id,
        }

    @property
    def affected_objects(self):
        """
        :return: A list of objects other than the alert_item that are affected by this alert
        """
        return []

    def end_event(self):
        return None

    def alert_message(self):
        raise NotImplementedError()

    def message(self):
        # The first time this is call __message will be none, so we have to call alert_message to
        # create the message and then save it. This will occur once for each message.
        # In the future for new alerts we will try and create them when the Alert is created but
        # at the time this patch is produced that is tricky.
        # The purpose of this is to make it so that Alerts can continue to operate when the data required
        # to create the message no longer exists.
        # It's a small step for HYD-5736 and a move towards a more efficient model.
        if self._message is None:
            self._message = self.alert_message()
            self.save()

        return self._message

    def affected_targets(self, affect_target):
        pass

    @classmethod
    def subclasses(cls):
        all_subclasses = []
        for subclass in cls.__subclasses__():
            all_subclasses.append(subclass)
            all_subclasses.extend(subclass.subclasses())
        return all_subclasses

    @classmethod
    def filter_by_item(cls, item):
        if hasattr(item, "content_type"):
            # A DowncastMetaclass object
            return cls.objects.filter(active=True, alert_item_id=item.id, alert_item_type=item.content_type)
        else:
            return cls.objects.filter(
                active=True,
                alert_item_id=item.pk,
                alert_item_type__model=item.__class__.__name__.lower(),
                alert_item_type__app_label=item.__class__._meta.app_label,
            )

    @classmethod
    def filter_by_item_id(cls, item_class, item_id):
        return cls.objects.filter(
            active=True,
            alert_item_id=item_id,
            alert_item_type__model=item_class.__name__.lower(),
            alert_item_type__app_label=item_class._meta.app_label,
        )

    @classmethod
    def notify(cls, alert_item, active, **kwargs):
        """Notify an alert in the default severity level for that alert"""

        return cls._notify(alert_item, active, **kwargs)

    @classmethod
    def notify_warning(cls, alert_item, active, **kwargs):
        """Notify an alert in at most the WARNING severity level"""

        kwargs["attrs_to_save"] = {"severity": min(cls.default_severity, logging.WARNING)}
        return cls._notify(alert_item, active, **kwargs)

    @classmethod
    def _notify(cls, alert_item, active, **kwargs):
        if hasattr(alert_item, "content_type"):
            alert_item = alert_item.downcast()

        if active:
            return cls.high(alert_item, **kwargs)
        else:
            return cls.low(alert_item, **kwargs)

    @classmethod
    def _get_attrs_to_save(cls, kwargs):
        # Prepare data to be saved with alert, but not effect the filter_by_item() below
        # e.g. Only one alert type per alert item can be active, so we don't need to filter on severity.
        attrs_to_save = kwargs.pop("attrs_to_save", {})

        # Add any properties to the attrs_to_save that are not db fields, we can't search on
        # non db fields after all. Some alerts have custom fields and they will be searched out here.
        fields = [field.attname for field in cls._meta.fields]
        for attr in kwargs.keys():
            if attr not in fields:
                attrs_to_save[attr] = kwargs.pop(attr)

        return attrs_to_save

    @classmethod
    def high(cls, alert_item, **kwargs):
        if hasattr(alert_item, "not_deleted") and alert_item.not_deleted != True:
            return None

        attrs_to_save = cls._get_attrs_to_save(kwargs)

        try:
            alert_state = cls.filter_by_item(alert_item).get(**kwargs)
        except cls.DoesNotExist:
            kwargs.update(attrs_to_save)

            if not "alert_type" in kwargs:
                kwargs["alert_type"] = cls.__name__
            if not "severity" in kwargs:
                kwargs["severity"] = cls.default_severity

            alert_state = cls(
                active=True, dismissed=False, alert_item=alert_item, **kwargs  # Users dismiss, not the software
            )
            try:
                alert_state._message = alert_state.alert_message()
                alert_state.save()
                job_log.info(
                    "AlertState: Raised %s on %s "
                    "at severity %s" % (cls, alert_state.alert_item, alert_state.severity)
                )
            except IntegrityError as e:
                job_log.warning(
                    "AlertState: IntegrityError %s saving %s : %s : %s" % (e, cls.__name__, alert_item, kwargs)
                )
                # Handle colliding inserts: drop out here, no need to update
                # the .end of the existing record as we are logically concurrent
                # with the creator.
                return None
        return alert_state

    @classmethod
    def low(cls, alert_item, **kwargs):
        # The caller may provide an end_time rather than wanting now()
        end_time = kwargs.pop("end_time", timezone.now())

        # currently, no attrs are saved when an attr is lowered, so just filter them out of kwargs
        cls._get_attrs_to_save(kwargs)

        try:
            alert_state = cls.filter_by_item(alert_item).get(**kwargs)
            alert_state.end = end_time
            alert_state.active = None
            alert_state.save()

            # We optionally emit an event when alerts are lowered: we don't do that
            # for the beginning because that is implicit in the alert itself, whereas
            # the end can reasonably have a different message.
            end_event = alert_state.end_event()
            if end_event:
                end_event.register_event(
                    end_event.alert_item,
                    severity=end_event.severity,
                    message_str=end_event.message_str,
                    alert=end_event.alert,
                )
        except cls.DoesNotExist:
            alert_state = None

        return alert_state

    @classmethod
    def register_event(cls, alert_item, **kwargs):
        # Events are Alerts with no duration, so just go high/low.
        alert_state = cls.high(alert_item, attrs_to_save=kwargs)
        cls.low(alert_item, end_time=alert_state.begin, attrs_to_save=kwargs)

    def cast(self, target_class):
        """
        Works exactly as the super except because we duplicate record_type with alert_type. We should remove in the
        future, but for now this fixes that up.
        :param target_class:
        :return:
        """
        # If the save fails for some reason then this change will have no affect.
        self.alert_type = target_class._meta.object_name

        new_alert = super(AlertStateBase, self).cast(target_class)

        # The message may well have changed so regenerate it.
        new_alert._message = None
        new_alert.message()

        return new_alert