Ejemplo n.º 1
0
 def __init__(self):
     super().__init__()
     self.version = version.version
     self.ruleset = RuleSet()
     self.patternset = PatternSet()
     self.triggers = defaultdict(
         list)  # event_class_id -> [trigger1, ..., triggerN]
     self.templates = {
     }  # event_class_id -> (body_template,subject_template)
     self.post_process = {}  # event_class_id -> [rule1, ..., ruleN]
     self.alter_handlers = []
     self.unclassified_codebook_depth = 5
     self.unclassified_codebook = {}  # object id -> [<codebook>]
     self.handlers = {}  # event class id -> [<handler>]
     self.dedup_filter = DedupFilter()
     self.suppress_filter = SuppressFilter()
     self.abduct_detector = AbductDetector()
     # Default link event action, when interface is not in inventory
     self.default_link_action = None
     # Reporting
     self.last_ts = None
     self.stats = {}
     #
     self.slot_number = 0
     self.total_slots = 0
     self.pool_partitions: Dict[str, int] = {}
     #
     self.cable_abduct_ecls: Optional[EventClass] = None
Ejemplo n.º 2
0
 def handle(self, paths, profile, format, progress=False, *args, **options):
     assert profile_loader.get_profile(
         profile), "Invalid profile: %s" % profile
     t0 = time.time()
     ruleset = RuleSet()
     ruleset.load()
     self.print("Ruleset load in %.2fms" % ((time.time() - t0) * 1000))
     reader = getattr(self, "read_%s" % format, None)
     assert reader, "Invalid format %s" % format
     self.managed_object = ManagedObject(id=1,
                                         name="test",
                                         address="127.0.0.1",
                                         profile_name=profile)
     t0 = time.time()
     stats = defaultdict(int)
     total = 0
     for p in paths:
         if not os.path.isfile(p):
             continue
         for f in iter_open(p):
             for event in reader(f):
                 e_vars = event.raw_vars.copy()
                 if event.source == "SNMP Trap":
                     e_vars.update(MIB.resolve_vars(event.raw_vars))
                 rule, r_vars = ruleset.find_rule(event, e_vars)
                 stats[rule.event_class.name] += 1
                 total += 1
                 if progress and total % 1000 == 0:
                     self.print("%d records processed" % total)
     dt = time.time() - t0
     self.print("%d events processed in %.2fms (%.fevents/sec)" %
                (total, dt * 1000, float(total) / dt))
     if stats:
         # Prepare statistics
         s_data = sorted([(k, stats[k]) for k in stats],
                         key=operator.itemgetter(1),
                         reverse=True)
         s_total = sum(stats[k] for k in stats if not self.is_ignored(k))
         data = [["Events", "%", "Event class"]]
         for ecls, qty in s_data:
             data += [[
                 str(qty),
                 "%3.2f%%" % (float(stats[ecls] * 100) / float(total)), ecls
             ]]
         # Calculate classification quality
         data += [[
             "",
             "%3.2f%%" % (float(s_total * 100) / total),
             "Classification Quality"
         ]]
         # Ruleset hit rate
         rs_rate = float(metrics["rules_checked"].value) / float(total)
         data += [["", "%.2f" % rs_rate, "Rule checks per event"]]
         # Dump table
         self.print("Event classes summary:")
         self.print(format_table([4, 6, 10], data))
Ejemplo n.º 3
0
 def __init__(self):
     super(ClassifierService, self).__init__()
     self.version = version.version
     self.ruleset = RuleSet()
     self.triggers = defaultdict(list)  # event_class_id -> [trigger1, ..., triggerN]
     self.templates = {}  # event_class_id -> (body_template,subject_template)
     self.post_process = {}  # event_class_id -> [rule1, ..., ruleN]
     self.suppression = {}  # event_class_id -> (condition, suppress)
     self.alter_handlers = []
     self.unclassified_codebook_depth = 5
     self.unclassified_codebook = {}  # object id -> [<codebook>]
     self.handlers = {}  # event class id -> [<handler>]
     # Default link event action, when interface is not in inventory
     self.default_link_action = None
     # Reporting
     self.last_ts = None
     self.stats = {}
Ejemplo n.º 4
0
class ClassifierService(TornadoService):
    """
    Events-classification service
    """

    name = "classifier"
    leader_group_name = "classifier-%(pool)s"
    pooled = True
    use_mongo = True
    process_name = "noc-%(name).10s-%(pool).5s"

    # SNMP OID pattern
    rx_oid = re.compile(r"^(\d+\.){6,}")

    interface_cache = cachetools.TTLCache(maxsize=10000, ttl=60)

    def __init__(self):
        super().__init__()
        self.version = version.version
        self.ruleset = RuleSet()
        self.patternset = PatternSet()
        self.triggers = defaultdict(
            list)  # event_class_id -> [trigger1, ..., triggerN]
        self.templates = {
        }  # event_class_id -> (body_template,subject_template)
        self.post_process = {}  # event_class_id -> [rule1, ..., ruleN]
        self.alter_handlers = []
        self.unclassified_codebook_depth = 5
        self.unclassified_codebook = {}  # object id -> [<codebook>]
        self.handlers = {}  # event class id -> [<handler>]
        self.dedup_filter = DedupFilter()
        self.suppress_filter = SuppressFilter()
        self.abduct_detector = AbductDetector()
        # Default link event action, when interface is not in inventory
        self.default_link_action = None
        # Reporting
        self.last_ts = None
        self.stats = {}
        #
        self.slot_number = 0
        self.total_slots = 0
        self.pool_partitions: Dict[str, int] = {}
        #
        self.cable_abduct_ecls: Optional[EventClass] = None

    async def on_activate(self):
        """
        Load rules from database after loading config
        """
        self.logger.info("Using rule lookup solution: %s",
                         config.classifier.lookup_handler)
        self.ruleset.load()
        self.patternset.load()
        self.load_triggers()
        self.load_link_action()
        self.load_handlers()
        # Heat up MIB cache
        MIBData.preload()
        self.slot_number, self.total_slots = await self.acquire_slot()
        await self.subscribe_stream("events.%s" % config.pool,
                                    self.slot_number, self.on_event)
        report_callback = PeriodicCallback(self.report, 1000)
        report_callback.start()

    def load_triggers(self):
        self.logger.info("Loading triggers")
        self.triggers = {}
        n = 0
        cn = 0
        self.alter_handlers = []
        ec = [(c.name, c.id) for c in EventClass.objects.all()]
        for t in EventTrigger.objects.all():
            self.logger.debug("Trigger '%s' for classes:", t.name)
            for c_name, c_id in ec:
                if re.search(t.event_class_re, c_name, re.IGNORECASE):
                    if (t.handler and t.condition == "True"
                            and t.selector is None and t.time_pattern is None
                            and t.template is None
                            and t.notification_group is None):
                        # Alter handlers
                        self.alter_handlers += [(c_id, t.is_enabled, t.handler)
                                                ]
                    elif t.is_enabled:
                        # Register trigger
                        h = t.handler
                        if h:
                            try:
                                h = get_handler(h)
                            except ImportError:
                                self.logger.error(
                                    "Failed to load handler '%s'. Ignoring", h)
                                h = None
                        if c_id in self.triggers:
                            self.triggers[c_id] += [Trigger(t, handler=h)]
                        else:
                            self.triggers[c_id] = [Trigger(t, handler=h)]
                        cn += 1
                        self.logger.debug("    %s", c_name)
            n += 1
        self.logger.info("%d triggers has been loaded to %d classes", n, cn)

    def load_link_action(self):
        self.default_link_action = None
        if config.classifier.default_interface_profile:
            p = InterfaceProfile.objects.filter(
                name=config.classifier.default_interface_profile).first()
            if p:
                self.logger.info("Setting default link event action to %s",
                                 p.link_events)
                self.default_link_action = p.link_events

    def load_handlers(self):
        self.logger.info("Loading handlers")
        self.handlers = {}
        # Process altered handlers
        enabled = defaultdict(list)  # event class id -> [handlers]
        disabled = defaultdict(list)  # event class id -> [handlers]
        for ec_id, status, handler in self.alter_handlers:
            if status:
                if handler in disabled[ec_id]:
                    disabled[ec_id].remove(handler)
                if handler not in enabled[ec_id]:
                    enabled[ec_id] += [handler]
            else:
                if handler not in disabled[ec_id]:
                    disabled[ec_id] += [handler]
                if handler in enabled[ec_id]:
                    enabled[ec_id].remove(handler)
        self.alter_handlers = []
        # Load handlers
        for ec in EventClass.objects.filter():
            handlers = (ec.handlers or []) + enabled[ec.id]
            if not handlers:
                continue
            self.logger.debug("    <%s>: %s", ec.name, ", ".join(handlers))
            hl = []
            for h in handlers:
                if h in disabled[ec.id]:
                    self.logger.debug("        disabling handler %s", h)
                    continue
                # Resolve handler
                try:
                    hh = get_handler(h)
                    hl += [hh]
                except ImportError:
                    self.logger.error("Failed to load handler '%s'. Ignoring",
                                      h)
            if hl:
                self.handlers[ec.id] = hl
        self.logger.info("Handlers are loaded")

    def retry_failed_events(self):
        """
        Return failed events to the unclassified queue if
        failed on other version of classifier
        """
        if FailedEvent.objects.count() == 0:
            return
        self.logger.info("Recovering failed events")
        wm = datetime.datetime.now() - datetime.timedelta(
            seconds=86400)  # @todo: use config
        dc = FailedEvent.objects.filter(timestamp__lt=wm).count()
        if dc > 0:
            self.logger.info("%d failed events are deprecated and removed", dc)
            FailedEvent.objects.filter(timestamp__lt=wm).delete()
        for e in FailedEvent.objects.filter(version__ne=self.version):
            e.mark_as_new(
                "Reclassification has been requested by noc-classifer")
            self.logger.debug("Failed event %s has been recovered", e.id)

    @classmethod
    @cachetools.cachedmethod(operator.attrgetter("interface_cache"))
    def get_interface(cls, managed_object_id, name):
        """
        Get interface instance
        """
        return noc.inv.models.interface.Interface.objects.filter(
            managed_object=managed_object_id, name=name).first()

    async def classify_event(self, event, data):
        """
        Perform event classification.
        Classification steps are:

        1. Format SNMP values accordind to MIB definitions (for SNMP events only)
        2. Find matching classification rule
        3. Calculate rule variables

        :param event: Event to classify
        :type event: NewEvent
        :returns: Classification status (CR_*)
        """
        metrics[E_SRC_METRICS.get(event.source, E_SRC_OTHER)] += 1
        is_unknown = False
        #
        pre_event = data.pop("$event", None)
        # Resolve MIB variables for SNMP Traps
        resolved_vars = {"profile": event.managed_object.profile.name}
        # Store event variables
        event.raw_vars = data
        if event.source == E_SRC_SNMP_TRAP:
            resolved_vars.update(MIB.resolve_vars(event.raw_vars))
        event.resolved_vars = resolved_vars
        # Get matched event class
        if pre_event:
            # Event is preprocessed, get class and variables
            event_class_name = pre_event.get("class")
            event_class = EventClass.get_by_name(event_class_name)
            if not event_class:
                self.logger.error(
                    "[%s|%s|%s] Failed to process event: Invalid event class '%s'",
                    event.id,
                    event.managed_object.name,
                    event.managed_object,
                    event_class_name,
                )
                metrics[CR_FAILED] += 1
                return  # Drop malformed message
            event.event_class = event_class
            event.vars = pre_event.get("vars", {})
        else:
            # Prevent unclassified events flood
            if self.check_unclassified_syslog_flood(event):
                return
            # Find matched event class
            c_vars = event.raw_vars.copy()
            c_vars.update({
                k: smart_text(fm_unescape(resolved_vars[k]))
                for k in resolved_vars
            })
            rule, vars = self.ruleset.find_rule(event, c_vars)
            if rule is None:
                # Something goes wrong.
                # No default rule found. Exit immediately
                self.logger.error("No default rule found. Exiting")
                os._exit(1)
            if rule.to_drop:
                # Silently drop event if declared by action
                self.logger.info(
                    "[%s|%s|%s] Dropped by action",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                )
                metrics[CR_DELETED] += 1
                return
            if rule.is_unknown_syslog:
                # Append to codebook
                msg = event.raw_vars.get("message", "")
                cb = self.get_msg_codebook(msg)
                o_id = event.managed_object.id
                if o_id not in self.unclassified_codebook:
                    self.unclassified_codebook[o_id] = []
                cbs = [cb] + self.unclassified_codebook[o_id]
                cbs = cbs[:self.unclassified_codebook_depth]
                self.unclassified_codebook[o_id] = cbs
            self.logger.debug(
                "[%s|%s|%s] Matching rule: %s",
                event.id,
                event.managed_object.name,
                event.managed_object.address,
                rule.name,
            )
            event.event_class = rule.event_class
            # Calculate rule variables
            event.vars = self.ruleset.eval_vars(event, event.event_class, vars)
            message = "Classified as '%s' by rule '%s'" % (
                event.event_class.name, rule.name)
            event.log += [
                EventLog(
                    timestamp=datetime.datetime.now(),
                    from_status="N",
                    to_status="A",
                    message=message,
                )
            ]
            is_unknown = rule.is_unknown
        # Event class found, process according to rules
        self.logger.info(
            "[%s|%s|%s] Event class: %s (%s)",
            event.id,
            event.managed_object.name,
            event.managed_object.address,
            event.event_class.name,
            event.vars,
        )
        # Deduplication
        if self.deduplicate_event(event):
            return
        # Suppress repeats
        if self.suppress_repeats(event):
            return
        # Activate event
        event.expires = event.timestamp + datetime.timedelta(
            seconds=event.event_class.ttl)
        event.save()
        # Fill deduplication filter
        self.dedup_filter.register(event)
        # Fill suppress filter
        self.suppress_filter.register(event)
        # Call handlers
        if self.call_event_handlers(event):
            return
        # Additionally check link events
        if await self.check_link_event(event):
            return
        # Call triggers
        if self.call_event_triggers(event):
            return
        # Finally dispose event to further processing by correlator
        if event.to_dispose:
            await self.dispose_event(event)
        if is_unknown:
            metrics[CR_UNKNOWN] += 1
        elif pre_event:
            metrics[CR_PREPROCESSED] += 1
        else:
            metrics[CR_CLASSIFIED] += 1

    async def dispose_event(self, event):
        self.logger.info(
            "[%s|%s|%s] Disposing",
            event.id,
            event.managed_object.name,
            event.managed_object.address,
        )
        # Calculate partition
        fm_pool = event.managed_object.get_effective_fm_pool().name
        stream = f"dispose.{fm_pool}"
        num_partitions = self.pool_partitions.get(fm_pool)
        if not num_partitions:
            num_partitions = await self.get_stream_partitions(stream)
            self.pool_partitions[fm_pool] = num_partitions
        partition = int(event.managed_object.id) % num_partitions
        self.publish(
            orjson.dumps({
                "event_id": str(event.id),
                "event": event.to_json()
            }),
            stream=stream,
            partition=partition,
        )
        metrics[CR_DISPOSED] += 1

    def deduplicate_event(self, event: ActiveEvent) -> bool:
        """
        Deduplicate event when necessary
        :param event:
        :param vars:
        :return: True, if event is duplication of existent one
        """
        de_id = self.dedup_filter.find(event)
        if not de_id:
            return False
        self.logger.info(
            "[%s|%s|%s] Duplicates event %s. Discarding",
            event.id,
            event.managed_object.name,
            event.managed_object.address,
            de_id,
        )
        # de.log_message("Duplicated event %s has been discarded" % event.id)
        metrics[CR_DUPLICATED] += 1
        return True

    def suppress_repeats(self, event: ActiveEvent) -> bool:
        """
        Suppress repeated events
        :param event:
        :param vars:
        :return:
        """
        se_id = self.suppress_filter.find(event)
        if not se_id:
            return False
        self.logger.info(
            "[%s|%s|%s] Suppressed by event %s",
            event.id,
            event.managed_object.name,
            event.managed_object.address,
            se_id,
        )
        # Update suppressing event
        ActiveEvent.log_suppression(se_id, event.timestamp)
        # Delete suppressed event
        metrics[CR_SUPPRESSED] += 1
        return True

    def call_event_handlers(self, event):
        """
        Call handlers associated with event class
        :param event:
        :return:
        """
        if event.event_class.id not in self.handlers:
            return False
        event_id = event.id  # Temporary store id
        for h in self.handlers[event.event_class.id]:
            try:
                h(event)
            except Exception:
                error_report()
            if event.to_drop:
                self.logger.info(
                    "[%s|%s|%s] Dropped by handler",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                )
                event.id = event_id  # Restore event id
                event.delete()
                metrics[CR_DELETED] += 1
                return True
        return False

    def call_event_triggers(self, event):
        """
        Call triggers associated with event class
        :param event:
        :return:
        """
        if event.event_class.id not in self.triggers:
            return False
        event_id = event.id
        for t in self.triggers[event.event_class.id]:
            try:
                t.call(event)
            except Exception:
                error_report()
            if event.to_drop:
                # Delete event and stop processing
                self.logger.info(
                    "[%s|%s|%s] Dropped by trigger %s",
                    event_id,
                    event.managed_object.name,
                    event.managed_object.address,
                    t.name,
                )
                event.id = event_id  # Restore event id
                event.delete()
                metrics[CR_DELETED] += 1
                return True
        else:
            return False

    def check_unclassified_syslog_flood(self, event):
        """
        Check if incoming messages is in unclassified codebook
        :param event:
        :return:
        """
        if event.source != E_SRC_SYSLOG or len(event.log):
            return False
        pcbs = self.unclassified_codebook.get(event.managed_object.id)
        if not pcbs:
            return False
        msg = event.raw_vars.get("message", "")
        cb = self.get_msg_codebook(msg)
        for pcb in pcbs:
            if self.is_codebook_match(cb, pcb):
                # Signature is already seen, suppress
                metrics[CR_UDUPLICATED] += 1
                return True
        return False

    async def check_link_event(self, event):
        """
        Additional link events check
        :param event:
        :return: True - stop processing, False - continue
        """
        if not event.event_class.link_event or "interface" not in event.vars:
            return False
        if_name = event.managed_object.get_profile().convert_interface_name(
            event.vars["interface"])
        iface = self.get_interface(event.managed_object.id, if_name)
        if iface:
            self.logger.info(
                "[%s|%s|%s] Found interface %s",
                event.id,
                event.managed_object.name,
                event.managed_object.address,
                iface.name,
            )
            action = iface.profile.link_events
        else:
            self.logger.info(
                "[%s|%s|%s] Interface not found:%s",
                event.id,
                event.managed_object.name,
                event.managed_object.address,
                if_name,
            )
            action = self.default_link_action
        # Abduct detection
        link_status = event.get_hint("link_status")
        if (link_status is not None and iface
                and iface.profile.enable_abduct_detection
                and event.managed_object.object_profile.abduct_detection_window
                and
                event.managed_object.object_profile.abduct_detection_threshold
            ):
            ts = int(event.timestamp.timestamp())
            if link_status:
                self.abduct_detector.register_up(ts, iface)
            else:
                if self.abduct_detector.register_down(ts, iface):
                    await self.raise_abduct_event(event)
        # Link actions
        if action == "I":
            # Ignore
            if iface:
                self.logger.info(
                    "[%s|%s|%s] Marked as ignored by interface profile '%s' (%s)",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                    iface.profile.name,
                    iface.name,
                )
            else:
                self.logger.info(
                    "[%s|%s|%s] Marked as ignored by default interface profile",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                )
            metrics[CR_DELETED] += 1
            return True
        elif action == "L":
            # Do not dispose
            if iface:
                self.logger.info(
                    "[%s|%s|%s] Marked as not disposable by interface profile '%s' (%s)",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                    iface.profile.name,
                    iface.name,
                )
            else:
                self.logger.info(
                    "[%s|%s|%s] Marked as not disposable by default interface",
                    event.id,
                    event.managed_object.name,
                    event.managed_object.address,
                )
            event.do_not_dispose()
        return False

    async def on_event(self, msg: Message):
        # Decode message
        event = orjson.loads(msg.value)
        object = event.get("object")
        data = event.get("data")
        # Process event
        event_ts = datetime.datetime.fromtimestamp(event.get("ts"))
        # Generate or reuse existing object id
        event_id = ObjectId(event.get("id"))
        # Calculate message processing delay
        lag = (time.time() - float(msg.timestamp) / NS) * 1000
        metrics["lag_us"] = int(lag * 1000)
        self.logger.debug("[%s] Receiving new event: %s (Lag: %.2fms)",
                          event_id, data, lag)
        metrics[CR_PROCESSED] += 1
        # Resolve managed object
        mo = ManagedObject.get_by_id(object)
        if not mo:
            self.logger.info("[%s] Unknown managed object id %s. Skipping",
                             event_id, object)
            metrics[CR_UOBJECT] += 1
            return
        self.logger.info("[%s|%s|%s] Managed object found", event_id, mo.name,
                         mo.address)
        # Process event
        source = data.pop("source", "other")
        event = ActiveEvent(
            id=event_id,
            timestamp=event_ts,
            start_timestamp=event_ts,
            managed_object=mo,
            source=source,
            repeats=1,
        )  # raw_vars will be filled by classify_event()
        # Ignore event
        if self.patternset.find_ignore_rule(event, data):
            self.logger.debug("Ignored event %s vars %s", event, data)
            metrics[CR_IGNORED] += 1
            return
        # Classify event
        try:
            await self.classify_event(event, data)
        except Exception as e:
            self.logger.error("[%s|%s|%s] Failed to process event: %s",
                              event.id, mo.name, mo.address, e)
            metrics[CR_FAILED] += 1
            return
        self.logger.info("[%s|%s|%s] Event processed successfully", event.id,
                         mo.name, mo.address)

    async def report(self):
        t = perf_counter()
        if self.last_ts:
            r = []
            for m in CR:
                ov = self.stats.get(m, 0)
                nv = metrics[m].value
                r += ["%s: %d" % (m[7:], nv - ov)]
                self.stats[m] = nv
            nt = metrics[CR_PROCESSED].value
            ot = self.stats.get(CR_PROCESSED, 0)
            total = nt - ot
            self.stats[CR_PROCESSED] = nt
            dt = t - self.last_ts
            if total:
                speed = total / dt
                self.logger.info("REPORT: %d events in %.2fms. %.2fev/s (%s)" %
                                 (total, dt * 1000, speed, ", ".join(r)))
        self.last_ts = t

    rx_non_alpha = re.compile(r"[^a-z]+")
    rx_spaces = re.compile(r"\s+")

    def get_msg_codebook(self, s):
        """
        Generate message codebook vector
        """
        x = self.rx_non_alpha.sub(" ", s.lower())
        x = self.rx_spaces.sub(" ", x)
        return x.strip()

    def is_codebook_match(self, cb1, cb2):
        """
        Check codebooks for match
        """
        return cb1 == cb2

    async def raise_abduct_event(self, event: ActiveEvent) -> None:
        """
        Create Cable Abduct Event and dispose it to correlator
        :param event:
        :return:
        """
        if not self.cable_abduct_ecls:
            self.cable_abduct_ecls = EventClass.get_by_name(CABLE_ABDUCT)
        abd_event = ActiveEvent(
            timestamp=event.timestamp,
            start_timestamp=event.timestamp,
            managed_object=event.managed_object,
            source=event.source,
            repeats=1,
            event_class=self.cable_abduct_ecls,
        )
        abd_event.save()
        await self.dispose_event(abd_event)
Ejemplo n.º 5
0
def ruleset():
    ruleset = RuleSet()
    ruleset.load()
    return ruleset
Ejemplo n.º 6
0
class ClassifierService(Service):
    """
    Events-classification service
    """
    name = "classifier"
    leader_group_name = "classifier-%(pool)s"
    pooled = True
    process_name = "noc-%(name).10s-%(pool).5s"

    # SNMP OID pattern
    rx_oid = re.compile(r"^(\d+\.){6,}")

    interface_cache = TTLCache(maxsize=10000, ttl=60)

    def __init__(self):
        super(ClassifierService, self).__init__()
        self.version = version.version
        self.ruleset = RuleSet()
        self.triggers = defaultdict(
            list)  # event_class_id -> [trigger1, ..., triggerN]
        self.templates = {
        }  # event_class_id -> (body_template,subject_template)
        self.post_process = {}  # event_class_id -> [rule1, ..., ruleN]
        self.suppression = {}  # event_class_id -> (condition, suppress)
        self.alter_handlers = []
        self.unclassified_codebook_depth = 5
        self.unclassified_codebook = {}  # object id -> [<codebook>]
        self.handlers = {}  # event class id -> [<handler>]
        # Default link event action, when interface is not in inventory
        self.default_link_action = None
        # Reporting
        self.last_ts = None
        self.stats = {}

    def on_activate(self):
        """
        Load rules from database after loading config
        """
        self.logger.info("Using rule lookup solution: %s",
                         config.classifier.lookup_handler)
        self.ruleset.load()
        self.load_triggers()
        self.load_suppression()
        self.load_link_action()
        self.load_handlers()
        self.subscribe("events.%s" % config.pool, "fmwriter", self.on_event)
        report_callback = tornado.ioloop.PeriodicCallback(
            self.report, 1000, self.ioloop)
        report_callback.start()

    def load_triggers(self):
        self.logger.info("Loading triggers")
        self.triggers = {}
        n = 0
        cn = 0
        self.alter_handlers = []
        ec = [(c.name, c.id) for c in EventClass.objects.all()]
        for t in EventTrigger.objects.all():
            self.logger.debug("Trigger '%s' for classes:", t.name)
            for c_name, c_id in ec:
                if re.search(t.event_class_re, c_name, re.IGNORECASE):
                    if (t.handler and t.condition == "True"
                            and t.selector is None and t.time_pattern is None
                            and t.template is None
                            and t.notification_group is None):
                        # Alter handlers
                        self.alter_handlers += [(c_id, t.is_enabled, t.handler)
                                                ]
                    elif t.is_enabled:
                        # Register trigger
                        h = t.handler
                        if h:
                            h = self.resolve_handler(h)
                        if c_id in self.triggers:
                            self.triggers[c_id] += [Trigger(t, handler=h)]
                        else:
                            self.triggers[c_id] = [Trigger(t, handler=h)]
                        cn += 1
                        self.logger.debug("    %s", c_name)
            n += 1
        self.logger.info("%d triggers has been loaded to %d classes", n, cn)

    def load_suppression(self):
        """
        Load suppression rules
        """
        def compile_rule(s):
            """
            Compile suppression rule
            """
            x = [
                "'timestamp__gte': event.timestamp - datetime.timedelta(seconds=%d)"
                % s["window"],
                "'timestamp__lte': event.timestamp + datetime.timedelta(seconds=%d)"
                % s["window"]
            ]
            if len(s["event_class"]) == 1:
                x += ["'event_class': ObjectId('%s')" % s["event_class"][0]]
            else:
                x += [
                    "'event_class__in: [%s]" %
                    ", ".join(["ObjectId('%s')" % c for c in s["event_class"]])
                ]
            for k, v in s["match_condition"].items():
                x += ["'%s': %s" % (k, v)]
            return compile("{%s}" % ", ".join(x), "<string>", "eval")

        self.logger.info("Loading suppression rules")
        self.suppression = {}
        for c in EventClass.objects.filter(repeat_suppression__exists=True):
            # Read event class rules
            suppression = []
            for r in c.repeat_suppression:
                to_skip = False
                for s in suppression:
                    if (s["condition"] == r.condition
                            and s["window"] == r.window
                            and s["suppress"] == r.suppress
                            and s["match_condition"] == r.match_condition
                            and r.event_class.id not in s["event_class"]):
                        s["event_class"] += [r.event_class.id]
                        s["name"] += ", " + r.name
                        to_skip = True
                        break
                if to_skip:
                    continue
                suppression += [{
                    "name": r.name,
                    "condition": r.condition,
                    "window": r.window,
                    "suppress": r.suppress,
                    "match_condition": r.match_condition,
                    "event_class": [r.event_class.id]
                }]
            # Compile suppression rules
            self.suppression[c.id] = [
                (compile_rule(s), "%s::%s" % (c.name, s["name"]),
                 s["suppress"]) for s in suppression
            ]
        self.logger.info("Suppression rules are loaded")

    def load_link_action(self):
        self.default_link_action = None
        if config.classifier.default_interface_profile:
            p = InterfaceProfile.objects.filter(
                name=config.classifier.default_interface_profile).first()
            if p:
                self.logger.info("Setting default link event action to %s",
                                 p.link_events)
                self.default_link_action = p.link_events

    def load_handlers(self):
        self.logger.info("Loading handlers")
        self.handlers = {}
        # Process altered handlers
        enabled = defaultdict(list)  # event class id -> [handlers]
        disabled = defaultdict(list)  # event class id -> [handlers]
        for ec_id, status, handler in self.alter_handlers:
            if status:
                if handler in disabled[ec_id]:
                    disabled[ec_id].remove(handler)
                if handler not in enabled[ec_id]:
                    enabled[ec_id] += [handler]
            else:
                if handler not in disabled[ec_id]:
                    disabled[ec_id] += [handler]
                if handler in enabled[ec_id]:
                    enabled[ec_id].remove(handler)
        self.alter_handlers = []
        # Load handlers
        for ec in EventClass.objects.filter():
            handlers = (ec.handlers or []) + enabled[ec.id]
            if not handlers:
                continue
            self.logger.debug("    <%s>: %s", ec.name, ", ".join(handlers))
            hl = []
            for h in handlers:
                if h in disabled[ec.id]:
                    self.logger.debug("        disabling handler %s", h)
                    continue
                # Resolve handler
                hh = self.resolve_handler(h)
                if hh:
                    hl += [hh]
            if hl:
                self.handlers[ec.id] = hl
        self.logger.info("Handlers are loaded")

    def resolve_handler(self, h):
        mn, s = h.rsplit(".", 1)
        try:
            m = __import__(mn, {}, {}, s)
        except ImportError:
            self.logger.error("Failed to load handler '%s'. Ignoring", h)
            return None
        try:
            return getattr(m, s)
        except AttributeError:
            self.logger.error("Failed to load handler '%s'. Ignoring", h)
            return None

    def retry_failed_events(self):
        """
        Return failed events to the unclassified queue if
        failed on other version of classifier
        """
        if FailedEvent.objects.count() == 0:
            return
        self.logger.info("Recovering failed events")
        wm = datetime.datetime.now() - datetime.timedelta(
            seconds=86400)  # @todo: use config
        dc = FailedEvent.objects.filter(timestamp__lt=wm).count()
        if dc > 0:
            self.logger.info("%d failed events are deprecated and removed", dc)
            FailedEvent.objects.filter(timestamp__lt=wm).delete()
        for e in FailedEvent.objects.filter(version__ne=self.version):
            e.mark_as_new(
                "Reclassification has been requested by noc-classifer")
            self.logger.debug("Failed event %s has been recovered", e.id)

    def find_matching_rule(self, event, vars):
        """
        Find first matching classification rule

        :param event: Event
        :type event: NewEvent
        :param vars: raw and resolved variables
        :type vars: dict
        :returns: Event class and extracted variables
        :rtype: tuple of (EventClass, dict)
        """
        # Get chain
        src = event.raw_vars.get("source")
        if src == "syslog":
            chain = "syslog"
            if "message" not in event.raw_vars:
                return None, None
        elif src == "SNMP Trap":
            chain = "snmp_trap"
        else:
            chain = "other"
        # Find rules lookup
        lookup = self.rules.get(event.managed_object.profile.name,
                                {}).get(chain)
        if lookup:
            for r in lookup.lookup_rules(event, vars):
                # Try to match rule
                v = r.match(event, vars)
                if v is not None:
                    self.logger.debug(
                        "[%s] Matching class for event %s found: %s (Rule: %s)",
                        event.managed_object.name, event.id,
                        r.event_class_name, r.name)
                    return r, v
        if self.default_rule:
            return self.default_rule, {}
        return None, None

    def eval_rule_variables(self, event, event_class, vars):
        """
        Evaluate rule variables
        """
        r = {}
        for ecv in event_class.vars:
            # Check variable is present
            if ecv.name not in vars:
                if ecv.required:
                    raise Exception("Required variable '%s' is not found" %
                                    ecv.name)
                else:
                    continue
            # Decode variable
            v = vars[ecv.name]
            decoder = getattr(self, "decode_%s" % ecv.type, None)
            if decoder:
                try:
                    v = decoder(event, v)
                except InterfaceTypeError:
                    raise EventProcessingFailed(
                        "Cannot decode variable '%s'. Invalid %s: %s" %
                        (ecv.name, ecv.type, repr(v)))
            r[ecv.name] = v
        return r

    def to_suppress(self, event, vars):
        """
        Check wrether event must be suppressed

        :returns: (bool, rule name, event)
        """
        ts = event.timestamp
        n_delta = None
        nearest = None
        n_name = None
        n_suppress = False
        for r, name, suppress in self.suppression[event.event_class.id]:
            q = eval(r, {}, {
                "event": event,
                "ObjectId": ObjectId,
                "datetime": datetime,
                "vars": vars
            })
            e = ActiveEvent.objects.filter(**q).order_by("-timestamp").first()
            if e:
                d = ts - e.timestamp
                if n_delta is None or d < n_delta:
                    n_delta = d
                    nearest = e
                    n_name = name
                    n_suppress = suppress
        return n_suppress, n_name, nearest

    @cachedmethod(operator.attrgetter("interface_cache"))
    def get_interface(self, managed_object_id, name):
        """
        Get interface instance
        """
        return noc.inv.models.interface.Interface.objects.filter(
            managed_object=managed_object_id, name=name).first()

    def classify_event(self, event, data):
        """
        Perform event classification.
        Classification steps are:

        1. Format SNMP values accordind to MIB definitions (for SNMP events only)
        2. Find matching classification rule
        3. Calculate rule variables

        :param event: Event to classify
        :type event: NewEvent
        :returns: Classification status (CR_*)
        """
        metrics[E_SRC_METRICS.get(event.source, E_SRC_OTHER)] += 1
        is_unknown = False
        #
        pre_event = data.pop("$event", None)
        # Resolve MIB variables for SNMP Traps
        resolved_vars = {"profile": event.managed_object.profile.name}
        # Store event variables
        event.raw_vars = data
        if event.source == E_SRC_SNMP_TRAP:
            resolved_vars.update(MIB.resolve_vars(event.raw_vars))
        event.resolved_vars = resolved_vars
        # Get matched event class
        if pre_event:
            # Event is preprocessed, get class and variables
            event_class_name = pre_event.get("class")
            event_class = EventClass.get_by_name(event_class_name)
            if not event_class:
                self.logger.error(
                    "[%s|%s|%s] Failed to process event: Invalid event class '%s'",
                    event.id, event.managed_object.name, event.managed_object,
                    event_class_name)
                metrics[CR_FAILED] += 1
                return  # Drop malformed message
            event.event_class = event_class
            event.vars = pre_event.get("vars", {})
        else:
            # Prevent unclassified events flood
            if self.check_unclassified_syslog_flood(event):
                return
            # Find matched event class
            c_vars = event.raw_vars.copy()
            c_vars.update(
                dict(
                    (k, fm_unescape(resolved_vars[k])) for k in resolved_vars))
            rule, vars = self.ruleset.find_rule(event, c_vars)
            if rule is None:
                # Something goes wrong.
                # No default rule found. Exit immediately
                self.logger.error("No default rule found. Exiting")
                os._exit(1)
            if rule.to_drop:
                # Silently drop event if declared by action
                self.logger.info("[%s|%s|%s] Dropped by action", event.id,
                                 event.managed_object.name,
                                 event.managed_object.address)
                metrics[CR_DELETED] += 1
                return
            if rule.is_unknown_syslog:
                # Append to codebook
                msg = event.raw_vars.get("message", "")
                cb = self.get_msg_codebook(msg)
                o_id = event.managed_object.id
                if o_id not in self.unclassified_codebook:
                    self.unclassified_codebook[o_id] = []
                cbs = [cb] + self.unclassified_codebook[o_id]
                cbs = cbs[:self.unclassified_codebook_depth]
                self.unclassified_codebook[o_id] = cbs
            self.logger.debug("[%s|%s|%s] Matching rule: %s", event.id,
                              event.managed_object.name,
                              event.managed_object.address, rule.name)
            event.event_class = rule.event_class
            # Calculate rule variables
            event.vars = self.ruleset.eval_vars(event, event.event_class, vars)
            message = "Classified as '%s' by rule '%s'" % (
                event.event_class.name, rule.name)
            event.log += [
                EventLog(timestamp=datetime.datetime.now(),
                         from_status="N",
                         to_status="A",
                         message=message)
            ]
            is_unknown = rule.is_unknown
        # Event class found, process according to rules
        self.logger.info("[%s|%s|%s] Event class: %s (%s)", event.id,
                         event.managed_object.name,
                         event.managed_object.address, event.event_class.name,
                         event.vars)
        # Additionally check link events
        if self.check_link_event(event):
            return
        # Deduplication
        if self.deduplicate_event(event):
            return
        # Suppress repeats
        if self.suppress_repeats(event):
            return
        # Activate event
        event.expires = event.timestamp + datetime.timedelta(
            seconds=event.event_class.ttl)
        event.save()
        # Call handlers
        if self.call_event_handlers(event):
            return
        # Call triggers
        if self.call_event_triggers(event):
            return
        # Finally dispose event to further processing by correlator
        if event.to_dispose:
            self.dispose_event(event)
        if is_unknown:
            metrics[CR_UNKNOWN] += 1
        elif pre_event:
            metrics[CR_PREPROCESSED] += 1
        else:
            metrics[CR_CLASSIFIED] += 1

    def dispose_event(self, event):
        self.logger.info("[%s|%s|%s] Disposing", event.id,
                         event.managed_object.name,
                         event.managed_object.address)
        # Heat up cache
        cache.set("activeent-%s" % event.id, event, ttl=900)
        # @todo: Use config.pool instead
        self.pub("correlator.dispose.%s" % event.managed_object.pool.name, {
            "event_id": str(event.id),
            "event": event.to_json()
        })
        metrics[CR_DISPOSED] += 1

    def deduplicate_event(self, event):
        """
        Deduplicate event when necessary
        :param event:
        :param vars:
        :return: True, if event is duplication of existent one
        """
        dw = event.event_class.deduplication_window
        if not dw:
            return False  # No deduplication for event class
        t0 = event.timestamp - datetime.timedelta(seconds=dw)
        q = {
            "managed_object": event.managed_object.id,
            "timestamp__gte": t0,
            "timestamp__lte": event.timestamp,
            "event_class": event.event_class.id,
            "id__ne": event.id
        }
        for v in event.vars:
            q["vars__%s" % v] = event.vars[v]
        de = ActiveEvent.objects.filter(**q).first()
        if de:
            self.logger.info("[%s|%s|%s] Duplicates event %s. Discarding",
                             event.id, event.managed_object.name,
                             event.managed_object.address, de.id)
            de.log_message("Duplicated event %s has been discarded" % event.id)
            metrics[CR_DUPLICATED] += 1
            return True
        else:
            return False

    def suppress_repeats(self, event):
        """
        Suppress repeated events
        :param event:
        :param vars:
        :return:
        """
        if event.event_class.id not in self.suppression:
            return False
        suppress, name, nearest = self.to_suppress(event, event.vars)
        if suppress:
            self.logger.info("[%s|%s|%s] Suppressed by rule %s", event.id,
                             event.managed_object.name,
                             event.managed_object.address, name)
            # Update suppressing event
            nearest.log_suppression(event.timestamp)
            # Delete suppressed event
            metrics[CR_SUPPRESSED] += 1
            return True
        else:
            return False

    def call_event_handlers(self, event):
        """
        Call handlers associated with event class
        :param event:
        :return:
        """
        if event.event_class.id not in self.handlers:
            return False
        event_id = event.id  # Temporary store id
        for h in self.handlers[event.event_class.id]:
            try:
                h(event)
            except Exception:
                error_report()
            if event.to_drop:
                self.logger.info("[%s|%s|%s] Dropped by handler", event.id,
                                 event.managed_object.name,
                                 event.managed_object.address)
                event.id = event_id  # Restore event id
                event.delete()
                metrics[CR_DELETED] += 1
                return True
        return False

    def call_event_triggers(self, event):
        """
        Call triggers associated with event class
        :param event:
        :return:
        """
        if event.event_class.id not in self.triggers:
            return False
        event_id = event.id
        for t in self.triggers[event.event_class.id]:
            try:
                t.call(event)
            except Exception:
                error_report()
            if event.to_drop:
                # Delete event and stop processing
                self.logger.info("[%s|%s|%s] Dropped by trigger %s", event_id,
                                 event.managed_object.name,
                                 event.managed_object.address, t.name)
                event.id = event_id  # Restore event id
                event.delete()
                metrics[CR_DELETED] += 1
                return True
        else:
            return False

    def check_unclassified_syslog_flood(self, event):
        """
        Check if incoming messages is in unclassified codebook
        :param event:
        :return:
        """
        if event.source != E_SRC_SYSLOG or len(event.log):
            return False
        pcbs = self.unclassified_codebook.get(event.managed_object.id)
        if not pcbs:
            return False
        msg = event.raw_vars.get("message", "")
        cb = self.get_msg_codebook(msg)
        for pcb in pcbs:
            if self.is_codebook_match(cb, pcb):
                # Signature is already seen, suppress
                metrics[CR_UDUPLICATED] += 1
                return True
        return False

    def check_link_event(self, event):
        """
        Additional link events check
        :param event:
        :return: True - stop processing, False - continue
        """
        if not event.event_class.link_event or "interface" not in event.vars:
            return False
        if_name = event.managed_object.get_profile().convert_interface_name(
            event.vars["interface"])
        iface = self.get_interface(event.managed_object.id, if_name)
        if iface:
            self.logger.info("[%s|%s|%s] Found interface %s", event.id,
                             event.managed_object.name,
                             event.managed_object.address, iface.name)
            action = iface.profile.link_events
        else:
            self.logger.info("[%s|%s|%s] Interface not found:%s", event.id,
                             event.managed_object.name,
                             event.managed_object.address, if_name)
            action = self.default_link_action
        if action == "I":
            # Ignore
            if iface:
                self.logger.info(
                    "[%s|%s|%s] Marked as ignored by interface profile '%s' (%s)",
                    event.id, event.managed_object.name,
                    event.managed_object.address, iface.profile.name,
                    iface.name)
            else:
                self.logger.info(
                    "[%s|%s|%s] Marked as ignored by default interface profile",
                    event.id, event.managed_object.name,
                    event.managed_object.address)
            metrics[CR_DELETED] += 1
            return True
        elif action == "L":
            # Do not dispose
            if iface:
                self.logger.info(
                    "[%s|%s|%s] Marked as not disposable by interface profile '%s' (%s)",
                    event.id, event.managed_object.name,
                    event.managed_object.address, iface.profile.name,
                    iface.name)
            else:
                self.logger.info(
                    "[%s|%s|%s] Marked as not disposable by default interface",
                    event.id, event.managed_object.name,
                    event.managed_object.address)
            event.do_not_dispose()
        return False

    def on_event(self,
                 message,
                 ts=None,
                 object=None,
                 data=None,
                 id=None,
                 *args,
                 **kwargs):
        event_ts = datetime.datetime.fromtimestamp(ts)
        # Generate or reuse existing object id
        event_id = bson.ObjectId(id)
        # Calculate messate processing delay
        lag = (time.time() - ts) * 1000
        metrics["lag_us"] = int(lag * 1000)
        self.logger.debug("[%s] Receiving new event: %s (Lag: %.2fms)",
                          event_id, data, lag)
        metrics[CR_PROCESSED] += 1
        # Resolve managed object
        mo = ManagedObject.get_by_id(object)
        if not mo:
            self.logger.info("[%s] Unknown managed object id %s. Skipping",
                             event_id, object)
            metrics[CR_UOBJECT] += 1
            return True
        self.logger.info("[%s|%s|%s] Managed object found", event_id, mo.name,
                         mo.address)
        # Process event
        source = data.pop("source", "other")
        event = ActiveEvent(
            id=event_id,
            timestamp=event_ts,
            start_timestamp=event_ts,
            managed_object=mo,
            source=source,
            repeats=1)  # raw_vars will be filled by classify_event()
        # Classify event
        try:
            self.classify_event(event, data)
        except Exception as e:
            self.logger.error("[%s|%s|%s] Failed to process event: %s",
                              event.id, mo.name, mo.address, e)
            metrics[CR_FAILED] += 1
            return False
        self.logger.info("[%s|%s|%s] Event processed successfully", event.id,
                         mo.name, mo.address)
        return True

    @tornado.gen.coroutine
    def report(self):
        t = perf_counter()
        if self.last_ts:
            r = []
            for m in CR:
                ov = self.stats.get(m, 0)
                nv = metrics[m].value
                r += ["%s: %d" % (m[7:], nv - ov)]
                self.stats[m] = nv
            nt = metrics[CR_PROCESSED].value
            ot = self.stats.get(CR_PROCESSED, 0)
            total = nt - ot
            self.stats[CR_PROCESSED] = nt
            dt = (t - self.last_ts)
            if total:
                speed = total / dt
                self.logger.info("REPORT: %d events in %.2fms. %.2fev/s (%s)" %
                                 (total, dt * 1000, speed, ", ".join(r)))
        self.last_ts = t

    rx_non_alpha = re.compile(r"[^a-z]+")
    rx_spaces = re.compile(r"\s+")

    def get_msg_codebook(self, s):
        """
        Generate message codebook vector
        """
        x = self.rx_non_alpha.sub(" ", s.lower())
        x = self.rx_spaces.sub(" ", x)
        return x.strip()

    def is_codebook_match(self, cb1, cb2):
        """
        Check codebooks for match
        """
        return cb1 == cb2