class AlarmLog(nosql.EmbeddedDocument): meta = {"allow_inheritance": False} timestamp = nosql.DateTimeField() from_status = nosql.StringField(max_length=1, regex=r"^[AC]$", required=True) to_status = nosql.StringField(max_length=1, regex=r"^[AC]$", required=True) message = nosql.StringField() def __unicode__(self): return u"%s [%s -> %s]: %s" % (self.timestamp, self.from_status, self.to_status, self.message)
class AlarmLog(nosql.EmbeddedDocument): meta = {"strict": False, "auto_create_index": False} timestamp = nosql.DateTimeField() from_status = nosql.StringField(max_length=1, regex=r"^[AC]$", required=True) to_status = nosql.StringField(max_length=1, regex=r"^[AC]$", required=True) message = nosql.StringField() def __str__(self): return "%s [%s -> %s]: %s" % ( self.timestamp, self.from_status, self.to_status, self.message, )
class TileCache(nosql.Document): meta = { "collection": "noc.gis.tilecache", "allow_inheritance": False, "indexes": [("map", "zoom", "x", "y")] } map = nosql.ObjectIdField() zoom = nosql.IntField(min_value=0, max_value=18) x = nosql.IntField(required=True) y = nosql.IntField(required=True) ready = nosql.BooleanField(default=False) last_updated = nosql.DateTimeField() data = nosql.BinaryField() def __unicode__(self): return "%s/%s/%s/%s" % (self.map, self.zoom, self.x, self.y)
class ActiveAlarm(nosql.Document): meta = { "collection": "noc.alarms.active", "allow_inheritance": False, "indexes": [ "timestamp", "discriminator", "root", "-severity", "alarm_class", ("timestamp", "managed_object") ] } status = "A" timestamp = nosql.DateTimeField(required=True) last_update = nosql.DateTimeField(required=True) managed_object = nosql.ForeignKeyField(ManagedObject) alarm_class = nosql.PlainReferenceField(AlarmClass) severity = nosql.IntField(required=True) vars = nosql.DictField() # Calculated alarm discriminator # Has meaning only for alarms with is_unique flag set # Calculated as sha1("value1\x00....\x00valueN").hexdigest() discriminator = nosql.StringField(required=False) log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog)) # Responsible person owner = nosql.ForeignKeyField(User, required=False) # opening_event = nosql.ObjectIdField(required=False) closing_event = nosql.ObjectIdField(required=False) # List of subscribers subscribers = nosql.ListField(nosql.ForeignKeyField(User)) # custom_subject = nosql.StringField(required=False) custom_style = nosql.ForeignKeyField(Style, required=False) # reopens = nosql.IntField(required=False) # RCA # Reference to root cause (Active Alarm or Archived Alarm instance) root = nosql.ObjectIdField(required=False) def __unicode__(self): return u"%s" % self.id def save(self, *args, **kwargs): if not self.last_update: self.last_update = self.timestamp return super(ActiveAlarm, self).save(*args, **kwargs) def _change_root_severity(self): """ Change root severity, when necessary """ if not self.root: return root = get_alarm(self.root) if root and root.severity < self.severity: root.change_severity(self.severity) root.log_message("Severity has been increased by child alarm %s" % self.id) def change_severity(self, user="", delta=None, severity=None): """ Change alarm severity """ if isinstance(user, User): user = user.username if delta: self.severity = max(0, self.severity + delta) if delta > 0: self.log_message("%s has increased alarm severity by %s" % (user, delta)) else: self.log_message("%s has decreased alarm severity by %s" % (user, delta)) elif severity: self.severity = severity.severity self.log_message("%s has changed severity to %s" % (user, severity.name)) self._change_root_severity() self.save() def log_message(self, message, to_save=True): self.log += [ AlarmLog(timestamp=datetime.datetime.now(), from_status=self.status, to_status=self.status, message=message) ] if to_save: self.save() def contribute_event(self, e, open=False, close=False): # Set opening event when necessary if open: self.opening_event = e.id # Set closing event when necessary if close: self.closing_event = e.id # Update timestamp if e.timestamp < self.timestamp: self.timestamp = e.timestamp else: self.last_update = max(self.last_update, e.timestamp) self.save() # Update event's list of alarms if self.id not in e.alarms: e.alarms.append(self.id) e.save() def clear_alarm(self, message): ts = datetime.datetime.now() log = self.log + [ AlarmLog( timestamp=ts, from_status="A", to_status="C", message=message) ] a = ArchivedAlarm(id=self.id, timestamp=self.timestamp, clear_timestamp=ts, managed_object=self.managed_object, alarm_class=self.alarm_class, severity=self.severity, vars=self.vars, log=log, root=self.root, opening_event=self.opening_event, closing_event=self.closing_event, discriminator=self.discriminator, reopens=self.reopens) ct = self.alarm_class.get_control_time(self.reopens) if ct: a.control_time = datetime.datetime.now() + datetime.timedelta( seconds=ct) a.save() # @todo: Clear related correlator jobs self.delete() # Send notifications if not a.root and not self.reopens: a.managed_object.event( a.managed_object.EV_ALARM_CLEARED, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes }) elif ct: # Schedule delayed job submit_job("fm.correlator", "control_notify", key=a.id, ts=a.control_time) return a def get_template_vars(self): """ Prepare template variables """ vars = self.vars.copy() vars.update({"alarm": self}) return vars @property def subject(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.subject_template).render(ctx) if len(s) >= 255: s = s[:125] + " ... " + s[-125:] return s @property def body(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.body_template).render(ctx) return s def change_owner(self, user): """ Change alarm's owner """ self.owner = user self.save() def subscribe(self, user): """ Change alarm's subscribers """ if user.id not in self.subscribers: self.subscribers += [user.id] self.log_message( "%s(%s) has been subscribed" % ((" ".join([user.first_name, user.last_name]), user.username)), to_save=False) self.save() def unsubscribe(self, user): if self.is_subscribed(user): self.subscribers = [u.id for u in self.subscribers if u != user.id] self.log_message( "%s(%s) has been unsubscribed" % ((" ".join([user.first_name, user.last_name]), user.username)), to_save=False) self.save() def is_owner(self, user): return self.owner == user def is_subscribed(self, user): return user.id in self.subscribers @property def is_unassigned(self): return self.owner is None @property def duration(self): dt = datetime.datetime.now() - self.timestamp return dt.days * 86400 + dt.seconds @property def display_duration(self): duration = datetime.datetime.now() - self.timestamp secs = duration.seconds % 60 mins = (duration.seconds / 60) % 60 hours = (duration.seconds / 3600) % 24 days = duration.days r = "%02d:%02d:%02d" % (hours, mins, secs) if days: r = "%dd %s" % (days, r) return r @property def effective_style(self): if self.custom_style: return self.custom_style else: return AlarmSeverity.get_severity(self.severity).style def set_root(self, root_alarm): """ Set root cause """ if self.root: return if self.id == root_alarm.id: raise Exception("Cannot set self as root cause") # Detect loop root = root_alarm while root and root.root: root = root.root if root == self.id: return root = get_alarm(root) # Set root self.root = root_alarm.id self.log_message("Alarm %s has been marked as root cause" % root_alarm.id) # self.save() Saved by log_message root_alarm.log_message("Alarm %s has been marked as child" % self.id) self._change_root_severity() # Clear pending notifications Notification.purge_delayed("alarm:%s" % self.id) @classmethod def enable_caching(cls, ttl=600): cls._fields["alarm_class"].set_cache(ttl)
class ActiveAlarm(nosql.Document): meta = { "collection": "noc.alarms.active", "strict": False, "auto_create_index": False, "indexes": [ "timestamp", "root", "-severity", ("alarm_class", "managed_object"), ("discriminator", "managed_object"), ("timestamp", "managed_object"), "escalation_tt", "escalation_ts", "adm_path", "segment_path", "container_path", "uplinks" ] } status = "A" timestamp = nosql.DateTimeField(required=True) last_update = nosql.DateTimeField(required=True) managed_object = nosql.ForeignKeyField(ManagedObject) alarm_class = nosql.PlainReferenceField(AlarmClass) severity = nosql.IntField(required=True) vars = nosql.DictField() # Calculated alarm discriminator # Has meaning only for alarms with is_unique flag set # Calculated as sha1("value1\x00....\x00valueN").hexdigest() discriminator = nosql.StringField(required=False) log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog)) # Responsible person owner = nosql.ForeignKeyField(User, required=False) # opening_event = nosql.ObjectIdField(required=False) closing_event = nosql.ObjectIdField(required=False) # List of subscribers subscribers = nosql.ListField(nosql.ForeignKeyField(User)) # custom_subject = nosql.StringField(required=False) custom_style = nosql.ForeignKeyField(Style, required=False) # reopens = nosql.IntField(required=False) # RCA # Reference to root cause (Active Alarm or Archived Alarm instance) root = nosql.ObjectIdField(required=False) # Escalated TT ID in form # <external system name>:<external tt id> escalation_ts = nosql.DateTimeField(required=False) escalation_tt = nosql.StringField(required=False) escalation_error = nosql.StringField(required=False) # span context escalation_ctx = nosql.LongField(required=False) # Close tt when alarm cleared close_tt = nosql.BooleanField(default=False) # Do not clear alarm until *wait_tt* is closed wait_tt = nosql.StringField() wait_ts = nosql.DateTimeField() # Directly affected services summary, grouped by profiles # (connected to the same managed object) direct_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) direct_subscribers = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) # Indirectly affected services summary, groupped by profiles # (covered by this and all inferred alarms) total_objects = nosql.ListField(nosql.EmbeddedDocumentField(ObjectSummaryItem)) total_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) total_subscribers = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) # Template and notification group to send close notification clear_template = nosql.ForeignKeyField(Template, required=False) clear_notification_group = nosql.ForeignKeyField(NotificationGroup, required=False) # Paths adm_path = nosql.ListField(nosql.IntField()) segment_path = nosql.ListField(nosql.ObjectIdField()) container_path = nosql.ListField(nosql.ObjectIdField()) # Uplinks, for topology_rca only uplinks = nosql.ListField(nosql.IntField()) def __unicode__(self): return u"%s" % self.id def iter_changed_datastream(self): if config.datastream.enable_alarm: yield "alarm", self.id def clean(self): super(ActiveAlarm, self).clean() if not self.last_update: self.last_update = self.timestamp data = self.managed_object.data self.adm_path = data.adm_path self.segment_path = data.segment_path self.container_path = data.container_path self.uplinks = data.uplinks def safe_save(self, **kwargs): """ Create new alarm or update existing if still exists :param kwargs: :return: """ if self.id: # Update existing only if exists if "save_condition" not in kwargs: kwargs["save_condition"] = {"id": self.id} try: self.save(**kwargs) except SaveConditionError: pass # Race condition, closed during update else: self.save() def change_severity(self, user="", delta=None, severity=None, to_save=True): """ Change alarm severity """ if isinstance(user, User): user = user.username if delta: self.severity = max(0, self.severity + delta) if delta > 0: self.log_message( "%s has increased alarm severity by %s" % ( user, delta)) else: self.log_message( "%s has decreased alarm severity by %s" % ( user, delta)) elif severity: if type(severity) in (int, long, float): self.severity = int(severity) self.log_message( "%s has changed severity to %s" % (user, severity)) else: self.severity = severity.severity self.log_message( "%s has changed severity to %s" % (user, severity.name)) if to_save: self.safe_save() def log_message(self, message, to_save=True): self.log += [AlarmLog(timestamp=datetime.datetime.now(), from_status=self.status, to_status=self.status, message=message)] if to_save: self.safe_save() def clear_alarm(self, message, ts=None, force=False): """ Clear alarm :param message: Log clearing message :param ts: Clearing timestamp :param force: Clear ever if wait_tt seg """ ts = ts or datetime.datetime.now() if self.wait_tt and not force: # Wait for escalated tt to close if not self.wait_ts: self.wait_ts = ts self.log_message("Waiting for TT to close") call_later( "noc.services.escalator.wait_tt.wait_tt", scheduler="escalator", pool=self.managed_object.escalator_shard, alarm_id=self.id ) return if self.alarm_class.clear_handlers: # Process clear handlers for h in self.alarm_class.get_clear_handlers(): try: h(self) except Exception: error_report() log = self.log + [AlarmLog(timestamp=ts, from_status="A", to_status="C", message=message)] a = ArchivedAlarm( id=self.id, timestamp=self.timestamp, clear_timestamp=ts, managed_object=self.managed_object, alarm_class=self.alarm_class, severity=self.severity, vars=self.vars, log=log, root=self.root, escalation_ts=self.escalation_ts, escalation_tt=self.escalation_tt, escalation_error=self.escalation_error, escalation_ctx=self.escalation_ctx, opening_event=self.opening_event, closing_event=self.closing_event, discriminator=self.discriminator, reopens=self.reopens, direct_services=self.direct_services, direct_subscribers=self.direct_subscribers, total_objects=self.total_objects, total_services=self.total_services, total_subscribers=self.total_subscribers, adm_path=self.adm_path, segment_path=self.segment_path, container_path=self.container_path, uplinks=self.uplinks ) ct = self.alarm_class.get_control_time(self.reopens) if ct: a.control_time = datetime.datetime.now() + datetime.timedelta(seconds=ct) a.save() # Send notifications if not a.root and not self.reopens: a.managed_object.event(a.managed_object.EV_ALARM_CLEARED, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes }) elif ct: pass # Set checks on all consequences for d in self._get_collection().find({ "root": self.id }, {"_id": 1, "alarm_class": 1}): ac = AlarmClass.get_by_id(d["alarm_class"]) if not ac: continue t = ac.recover_time if not t: continue call_later( "noc.services.correlator.check.check_close_consequence", scheduler="correlator", pool=self.managed_object.pool.name, delay=t, alarm_id=d["_id"] ) # Clear alarm self.delete() # Close TT # MUST be after .delete() to prevent race conditions if a.escalation_tt or self.clear_template: if self.clear_template: ctx = { "alarm": a } subject = self.clear_template.render_subject(**ctx) body = self.clear_template.render_body(**ctx) else: subject = "Alarm cleared" body = "Alarm has been cleared" call_later( "noc.services.escalator.escalation.notify_close", scheduler="escalator", pool=self.managed_object.escalator_shard, max_runs=ALARM_CLOSE_RETRIES, alarm_id=self.id, tt_id=self.escalation_tt, subject=subject, body=body, notification_group_id=self.clear_notification_group.id if self.clear_notification_group else None, close_tt=self.close_tt ) # Gather diagnostics AlarmDiagnosticConfig.on_clear(a) # Return archived return a def get_template_vars(self): """ Prepare template variables """ vars = self.vars.copy() vars.update({"alarm": self}) return vars @property def subject(self): if self.custom_subject: s = self.custom_subject else: ctx = Context(self.get_template_vars()) s = DjangoTemplate(self.alarm_class.subject_template).render(ctx) if len(s) >= 255: s = s[:125] + " ... " + s[-125:] return s @property def body(self): ctx = Context(self.get_template_vars()) s = DjangoTemplate(self.alarm_class.body_template).render(ctx) return s def change_owner(self, user): """ Change alarm's owner """ self.owner = user self.save() def subscribe(self, user): """ Change alarm's subscribers """ if user.id not in self.subscribers: self.subscribers += [user.id] self.log_message("%s(%s) has been subscribed" % ( (" ".join([user.first_name, user.last_name]), user.username) ), to_save=False) self.save() def unsubscribe(self, user): if self.is_subscribed(user): self.subscribers = [u.id for u in self.subscribers if u != user.id] self.log_message("%s(%s) has been unsubscribed" % ( (" ".join([user.first_name, user.last_name]), user.username) ), to_save=False) self.save() def is_owner(self, user): return self.owner == user def is_subscribed(self, user): return user.id in self.subscribers @property def is_unassigned(self): return self.owner is None @property def duration(self): dt = datetime.datetime.now() - self.timestamp return dt.days * 86400 + dt.seconds @property def display_duration(self): duration = datetime.datetime.now() - self.timestamp secs = duration.seconds % 60 mins = (duration.seconds / 60) % 60 hours = (duration.seconds / 3600) % 24 days = duration.days r = "%02d:%02d:%02d" % (hours, mins, secs) if days: r = "%dd %s" % (days, r) return r @property def effective_style(self): if self.custom_style: return self.custom_style else: return AlarmSeverity.get_severity(self.severity).style def get_root(self): """ Get top-level root alarm """ root = self while root.root: root = get_alarm(root.root) return root def update_summary(self): def update_dict(d1, d2): for k in d2: if k in d1: d1[k] += d2[k] else: d1[k] = d2[k] services = SummaryItem.items_to_dict(self.direct_services) subscribers = SummaryItem.items_to_dict(self.direct_subscribers) objects = { self.managed_object.object_profile.id: 1 } for a in ActiveAlarm.objects.filter(root=self.id): a.update_summary() update_dict( objects, SummaryItem.items_to_dict(a.total_objects) ) update_dict( services, SummaryItem.items_to_dict(a.total_services) ) update_dict( subscribers, SummaryItem.items_to_dict(a.total_subscribers) ) obj_list = ObjectSummaryItem.dict_to_items(objects) svc_list = SummaryItem.dict_to_items(services) sub_list = SummaryItem.dict_to_items(subscribers) if svc_list != self.total_services or sub_list != self.total_subscribers or obj_list != self.total_objects: ns = ServiceSummary.get_severity({ "service": services, "subscriber": subscribers, "objects": objects }) self.total_objects = obj_list self.total_services = svc_list self.total_subscribers = sub_list if ns != self.severity: self.change_severity(severity=ns, to_save=False) self.safe_save() def set_root(self, root_alarm): """ Set root cause """ if self.root: return if self.id == root_alarm.id: raise Exception("Cannot set self as root cause") # Detect loop root = root_alarm while root and root.root: root = root.root if root == self.id: return root = get_alarm(root) # Set root self.root = root_alarm.id self.log_message( "Alarm %s has been marked as root cause" % root_alarm.id) # self.save() Saved by log_message root_alarm.log_message( "Alarm %s has been marked as child" % self.id) root_alarm.update_summary() # Clear pending notifications # Notification.purge_delayed("alarm:%s" % self.id) def escalate(self, tt_id, close_tt=False): self.escalation_tt = tt_id self.escalation_ts = datetime.datetime.now() self.close_tt = close_tt self.log_message("Escalated to %s" % tt_id) q = {"_id": self.id} op = { "$set": { "escalation_tt": self.escalation_tt, "escalation_ts": self.escalation_ts, "close_tt": self.close_tt, "escalation_error": None } } r = ActiveAlarm._get_collection().update_one(q, op) if r.acknowledged and not r.modified_count: # Already closed, update archive ArchivedAlarm._get_collection().update_one(q, op) def set_escalation_error(self, error): self.escalation_error = error self._get_collection().update_one( {"_id": self.id}, {"$set": { "escalation_error": error }} ) def set_escalation_context(self): current_context, current_span = get_current_span() if current_context or self.escalation_ctx: self.escalation_ctx = current_context self._get_collection().update_one( {"_id": self.id}, {"$set": { "escalation_ctx": current_context }} ) def set_clear_notification(self, notification_group, template): self.clear_notification_group = notification_group self.clear_template = template self.safe_save(save_condition={ "managed_object": { "$exists": True }, "id": self.id }) def iter_consequences(self): """ Generator yielding all consequences alarm """ for a in ActiveAlarm.objects.filter(root=self.id): yield a for ca in a.iter_consequences(): yield ca def iter_affected(self): """ Generator yielding all affected managed objects """ seen = set([self.managed_object]) yield self.managed_object for a in self.iter_consequences(): if a.managed_object not in seen: seen.add(a.managed_object) yield a.managed_object def iter_escalated(self): """ Generator yielding all escalated consequences """ for a in self.iter_consequences(): if a.escalation_tt: yield a
class Process(nosql.Document): meta = { "collection": "noc.wf.processes", "allow_inheritance": False } workflow = nosql.PlainReferenceField(Workflow) node = nosql.PlainReferenceField(Node) context = nosql.RawDictField() start_time = nosql.DateTimeField() trace = nosql.BooleanField(default=False) sleep_time = nosql.IntField() class SleepException(Exception): pass class CannotSleepError(Exception): pass def __unicode__(self): return "%s at %s (%s)" % (self.workflow, self.node, self.id) def info(self, msg): logging.info("[%s (PID: %s)] %s" % ( self.workflow, self.id, msg)) def update_context(self, param, value): self.context[param] = value def step(self): to_sleep = False while not to_sleep: if self.trace: self.info("Entering node '%s' (%s %s) with context %s" % ( self.node.name, self.node.handler, self.node.params, self.context )) handler = self.node.handler_class() try: r = handler.run(self, self.node) except self.SleepException: to_sleep = True if self.trace: self.info("Leaving node '%s' with context %s" % ( self.node.name, self.context )) # Detect next node if handler.conditional: if r: next_node = self.node.next_true_node else: next_node = self.node.next_false_node else: next_node = self.node.next_node # Move to next node if next_node: if self.trace: self.info("Moving to node '%s'" % next_node.name) self.node = next_node self.save() else: if self.trace: self.info("Stopping at node '%s' with context %s" % (self.node.name, self.context)) return True return False # Suspended def schedule(self): submit_job("wf.jobs", "wf.wfstep", key=self.id) def sleep(self, t): if self.node.handler_class().conditional: raise self.CannotSleepError( "Cannot sleep in conditional handler") self.sleep_time = t self.save() raise self.SleepException
class MIB(nosql.Document): meta = {"collection": "noc.mibs", "allow_inheritance": False} name = nosql.StringField(required=True, unique=True) description = nosql.StringField(required=False) last_updated = nosql.DateTimeField(required=True) depends_on = nosql.ListField(nosql.StringField()) # TC definitions: name -> SYNTAX typedefs = nosql.DictField(required=False) # Compiled MIB version version = nosql.IntField(required=False, default=0) MIBRequiredException = MIBRequiredException def __unicode__(self): return self.name def get_text(self): """ Returns MIB text :return: """ for d in ["local/share/mibs", "share/mibs"]: path = os.path.join(d, self.name + ".mib") if os.path.isfile(path): with open(path) as f: return f.read() return "" @classmethod def parse_syntax(cls, syntax): """ Process part of smidump output and convert to syntax structure """ if "base_type" in syntax: # Already compiled return syntax s = {} if "basetype" in syntax: s["base_type"] = syntax["basetype"] elif "base_type" in syntax: s["base_type"] = syntax["base_type"] if "name" in syntax and "module" in syntax: if syntax["module"] == "": # Empty module -> builitin types s["base_type"] = syntax["name"] else: # Resolve references mib = MIB.objects.filter(name=syntax["module"]).first() if mib is None: raise MIBNotFoundException(syntax["module"]) if not mib.typedefs or syntax["name"] not in mib.typedefs: return {} td = mib.typedefs[syntax["name"]] for k in ["base_type", "display_hint", "enum_map"]: if k in td: s[k] = td[k] if s["base_type"] in ("Enumeration", "Bits"): enum_map = s.get("enum_map", {}) for k in syntax: sk = syntax[k] if type(sk) != dict: continue if "nodetype" in sk and sk["nodetype"] == "namednumber": enum_map[sk["number"]] = k s["enum_map"] = enum_map if "format" in syntax: s["display_hint"] = syntax["format"] return s @classmethod def load(cls, path, force=False): """ Load MIB from file :param path: MIB path :param force: Load anyways :return: MIB object """ if not os.path.exists(path): raise ValueError("File not found: %s" % path) # Build SMIPATH variable for smidump # to exclude locally installed MIBs smipath = ["share/mibs", "local/share/mibs"] # Pass MIB through smilint to detect missed modules f = subprocess.Popen([config.get("path", "smilint"), "-m", path], stderr=subprocess.PIPE, env={ "SMIPATH": ":".join(smipath) }).stderr for l in f: match = rx_module_not_found.search(l.strip()) if match: raise MIBRequiredException("Uploaded MIB", match.group(1)) # Convert MIB to python module and load with temporary_file() as p: subprocess.check_call([ config.get("path", "smidump"), "-k", "-q", "-f", "python", "-o", p, path ], env={"SMIPATH": ":".join(smipath)}) # Add coding string with open(p) as f: data = unicode(f.read(), "ascii", "ignore").encode("ascii") with open(p, "w") as f: f.write(data) m = imp.load_source("mib", p) mib_name = m.MIB["moduleName"] # Check module dependencies depends_on = {} # MIB Name -> Object ID if "imports" in m.MIB: for i in m.MIB["imports"]: if "module" not in i: continue rm = i["module"] if rm in depends_on: continue md = MIB.objects.filter(name=rm).first() if md is None: raise MIBRequiredException(mib_name, rm) depends_on[rm] = md # Get MIB latest revision date try: last_updated = datetime.datetime.strptime( sorted([x["date"] for x in m.MIB[mib_name]["revisions"]])[-1], "%Y-%m-%d %H:%M") except: last_updated = datetime.datetime(year=1970, month=1, day=1) # Extract MIB typedefs typedefs = {} if "typedefs" in m.MIB: for t in m.MIB["typedefs"]: typedefs[t] = cls.parse_syntax(m.MIB["typedefs"][t]) # Check mib already uploaded mib_description = m.MIB[mib_name].get("description", None) mib = MIB.objects.filter(name=mib_name).first() if force and mib: # Delete mib to forceful update MIBData.objects.filter(mib=mib.id).delete() mib.clean() mib.delete() mib = None if mib is not None: # Skip same version if mib.last_updated >= last_updated: return mib mib.description = mib_description mib.last_updated = last_updated mib.depends_on = sorted(depends_on) mib.typedefs = typedefs mib.save() # Delete all MIB Data mib.clean() else: # Create MIB mib = MIB(name=mib_name, description=mib_description, last_updated=last_updated, depends_on=sorted(depends_on), typedefs=typedefs) mib.save() # Upload MIB data data = [] for i in ["nodes", "notifications"]: if i in m.MIB: data += [{ "name": "%s::%s" % (mib_name, node), "oid": v["oid"], "description": v.get("description"), "syntax": v["syntax"]["type"] if "syntax" in v else None } for node, v in m.MIB[i].items()] mib.load_data(data) # Save MIB to cache if not uploaded from cache lcd = os.path.join("local", "share", "mibs") if not os.path.isdir(lcd): # Ensure directory exists os.makedirs(os.path.join("local", "share", "mibs")) local_cache_path = os.path.join(lcd, "%s.mib" % mib_name) cache_path = os.path.join("share", "mibs", "%s.mib" % mib_name) if ((os.path.exists(local_cache_path) and os.path.samefile(path, local_cache_path)) or (os.path.exists(cache_path) and os.path.samefile(path, cache_path))): return mib with open(path) as f: data = f.read() safe_rewrite(local_cache_path, data) return mib def load_data(self, data): """ Load mib data from list of {oid:, name:, description:, syntax:} :param data: :return: """ # Get MIB preference mp = MIBPreference.objects.filter(mib=self.name).first() mib_preference = mp.preference if mp else None prefs = {} # MIB Preferences cache # Load data for v in data: oid = v["oid"] oid_name = v["name"] description = v.get("description", None) o = MIBData.objects.filter(oid=oid).first() if o is not None: if o.name == oid_name: # Same oid, same name: duplicated declaration. # Silently skip continue # For same MIB - leave first entry if oid_name.split("::", 1)[0] == o.name.split("::", 1)[0]: continue # Try to resolve collision if not mib_preference: # No preference for target MIB raise OIDCollision(oid, oid_name, o.name, "No preference for %s" % self.name) o_mib = o.name.split("::")[0] if o_mib not in prefs: mp = MIBPreference.objects.filter(mib=o_mib).first() if not mp: # No preference for destination MIB raise OIDCollision(oid, oid_name, o.name, "No preference for %s" % o_mib) prefs[o_mib] = mp.preference # Add to cache o_preference = prefs[o_mib] if mib_preference == o_preference: # Equal preferences, collision raise OIDCollision(oid, oid_name, o.name, "Equal preferences") if mib_preference < o_preference: # Replace existing o.aliases = sorted(o.aliases + [o.name]) o.name = oid_name o.mib = self.id if description: o.description = description syntax = v.get("syntax") if syntax: o.syntax = MIB.parse_syntax(syntax) o.save() else: # Append to aliases if oid_name not in o.aliases: o.aliases = sorted(o.aliases + [oid_name]) o.save() else: # No OID collision found, save syntax = v.get("syntax") if syntax: syntax = MIB.parse_syntax(syntax) MIBData(mib=self.id, oid=oid, name=oid_name, description=description, syntax=syntax).save() @classmethod def get_oid(cls, name): """ Get OID by name """ tail = "" match = rx_tailing_numbers.match(name) if match: name, tail = match.groups() # Search by primary name d = MIBData.objects.filter(name=name).first() if not d: # Search by aliases d = MIBData.objects.filter(aliases=name).first() if d: return d.oid + tail return None @classmethod def get_name(cls, oid): """ Get longest match name by OID """ oid = OIDAlias.rewrite(oid) l_oid = oid.split(".") rest = [] while l_oid: c_oid = ".".join(l_oid) d = MIBData.objects.filter(oid=c_oid).first() if d: return MIBAlias.rewrite(".".join([d.name] + rest)) else: rest = [l_oid.pop()] + rest return oid @classmethod def get_name_and_syntax(cls, oid): """ :return: (name, syntax) """ oid = OIDAlias.rewrite(oid) l_oid = oid.split(".") rest = [] while l_oid: c_oid = ".".join(l_oid) d = MIBData.objects.filter(oid=c_oid).first() if d: name = d.name if rest: name += "." + ".".join(reversed(rest)) return (MIBAlias.rewrite(name), SyntaxAlias.rewrite(name, d.syntax)) else: rest += [l_oid.pop()] return oid, None @classmethod def get_description(cls, name): """ Get longest match description by name """ match = rx_tailing_numbers.match(name) if match: name, _ = match.groups() # Search by primary name d = MIBData.objects.filter(name=name).first() if not d: # Search by aliases d = MIBData.objects.filter(aliases=name).first() if d: return d.description else: return None @property def depended_by(self): return MIB.objects.filter(depends_on=self.name) def clean(self): """ Gracefully wipe out MIB data """ # Delete data without aliases MIBData.objects.filter(mib=self.id, aliases=[]).delete() # Dereference aliases prefs = {} # MIB -> Preference for o in MIBData.objects.filter(mib=self.id, aliases__ne=[]): if not o.aliases: # NO aliases o.delete() continue if len(o.aliases) == 1: # Only one alias ba = o.aliases[0] else: # Find preferable alias ba = None lp = None for a in o.aliases: am = a.split("::")[0] # Find MIB preference if am not in prefs: p = MIBPreference(mib=am).first() if p is None: raise Exception("No preference for %s" % am) prefs[am] = p.preference p = prefs[am] if lp is None or p < lp: # Better ba = a lp = p # Promote preferable alias o.name = ba o.aliases = [a for a in o.aliases if a != ba] o.save() @classmethod def resolve_vars(cls, vars): """ Resolve FM key -> value dict according to MIBs :param cls: :param vars: :return: """ r = {} for k in vars: if not is_oid(k): # Nothing to resolve continue v = fm_unescape(vars[k]) rk, syntax = cls.get_name_and_syntax(k) rv = v if syntax: # Format value according to syntax if syntax["base_type"] == "Enumeration": # Expand enumerated type try: rv = syntax["enum_map"][str(v)] except KeyError: pass elif syntax["base_type"] == "Bits": # @todo: Fix ugly hack if v.startswith("="): xv = int(v[1:], 16) else: xv = 0 for c in v: xv = (xv << 8) + ord(c) # Decode b_map = syntax.get("enum_map", {}) b = [] n = 0 while xv: if xv & 1: x = str(n) if x in b_map: b = [b_map[x]] + b else: b = ["%X" % (1 << n)] n += 1 xv >>= 1 rv = "(%s)" % ",".join(b) else: # Render according to TC rv = render_tc(v, syntax["base_type"], syntax.get("display_hint", None)) try: unicode(rv, "utf8") except: # Escape invalid UTF8 rv = fm_escape(rv) else: try: unicode(rv, "utf8") except: # escape invalid UTF8 rv = fm_escape(rv) if is_oid(v): # Resolve OID in value rv = MIB.get_name(v) if rk != k or rv != v: r[rk] = rv return r
class ArchivedAlarm(nosql.Document): meta = { "collection": "noc.alarms.archived", "allow_inheritance": False, "indexes": [ "root", "control_time", "timestamp", "managed_object" ] } status = "C" timestamp = nosql.DateTimeField(required=True) clear_timestamp = nosql.DateTimeField(required=True) managed_object = nosql.ForeignKeyField(ManagedObject) alarm_class = nosql.PlainReferenceField(AlarmClass) severity = nosql.IntField(required=True) vars = nosql.DictField() log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog)) # opening_event = nosql.ObjectIdField(required=False) closing_event = nosql.ObjectIdField(required=False) # Number of reopens reopens = nosql.IntField(required=False) # Copied discriminator discriminator = nosql.StringField(required=False) # Control time within alarm will be reopen instead # instead of creating the new alarm control_time = nosql.DateTimeField(required=False) # RCA # Reference to root cause (Active Alarm or Archived Alarm instance) root = nosql.ObjectIdField(required=False) def __unicode__(self): return u"%s" % self.id def log_message(self, message): self.log += [AlarmLog(timestamp=datetime.datetime.now(), from_status=self.status, to_status=self.status, message=message)] self.save() def get_template_vars(self): """ Prepare template variables """ vars = self.vars.copy() vars.update({"event": self}) return vars @property def subject(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.subject_template).render(ctx) if len(s) >= 255: s = s[:125] + " ... " + s[-125:] return s @property def body(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.body_template).render(ctx) return s @property def duration(self): dt = self.clear_timestamp - self.timestamp return dt.days * 86400 + dt.seconds @property def display_duration(self): duration = self.clear_timestamp - self.timestamp secs = duration.seconds % 60 mins = (duration.seconds / 60) % 60 hours = (duration.seconds / 3600) % 24 days = duration.days if days: return "%dd %02d:%02d:%02d" % (days, hours, mins, secs) else: return "%02d:%02d:%02d" % (hours, mins, secs) @property def effective_style(self): return AlarmSeverity.get_severity(self.severity).style def set_root(self, root_alarm): pass def reopen(self, message): """ Reopen alarm back """ reopens = self.reopens or 0 ts = datetime.datetime.now() log = self.log + [AlarmLog(timestamp=ts, from_status="C", to_status="A", message=message)] a = ActiveAlarm( id=self.id, timestamp=self.timestamp, last_update=ts, managed_object=self.managed_object, alarm_class=self.alarm_class, severity=self.severity, vars=self.vars, log=log, root=self.root, opening_event=self.opening_event, discriminator=self.discriminator, reopens=reopens + 1 ) a.save() # @todo: Clear related correlator jobs self.delete() # Remove pending control_notify job remove_job("fm.correlator", "control_notify", key=a.id) # Send notifications # Do not set notifications for child and for previously reopened # alarms if not a.root and not reopens: a.managed_object.event(a.managed_object.EV_ALARM_REOPENED, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes }) return a
class ArchivedAlarm(nosql.Document): meta = { "collection": "noc.alarms.archived", "strict": False, "auto_create_index": False, "indexes": [ "root", "timestamp", "managed_object", ("managed_object", "discriminator", "control_time"), "escalation_tt", "escalation_ts" ] } status = "C" timestamp = nosql.DateTimeField(required=True) clear_timestamp = nosql.DateTimeField(required=True) managed_object = nosql.ForeignKeyField(ManagedObject) alarm_class = nosql.PlainReferenceField(AlarmClass) severity = nosql.IntField(required=True) vars = nosql.DictField() log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog)) # opening_event = nosql.ObjectIdField(required=False) closing_event = nosql.ObjectIdField(required=False) # Number of reopens reopens = nosql.IntField(required=False) # Copied discriminator discriminator = nosql.StringField(required=False) # Control time within alarm will be reopen instead # instead of creating the new alarm control_time = nosql.DateTimeField(required=False) # RCA # Reference to root cause (Active Alarm or Archived Alarm instance) root = nosql.ObjectIdField(required=False) # Escalated TT ID in form # <external system name>:<external tt id> escalation_ts = nosql.DateTimeField(required=False) escalation_tt = nosql.StringField(required=False) escalation_error = nosql.StringField(required=False) escalation_ctx = nosql.LongField(required=False) escalation_close_ts = nosql.DateTimeField(required=False) escalation_close_error = nosql.StringField(required=False) escalation_close_ctx = nosql.LongField(required=False) # Directly affected services summary, grouped by profiles # (connected to the same managed object) direct_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) direct_subscribers = nosql.ListField( nosql.EmbeddedDocumentField(SummaryItem)) # Indirectly affected services summary, groupped by profiles # (covered by this and all inferred alarms) total_objects = nosql.ListField( nosql.EmbeddedDocumentField(ObjectSummaryItem)) total_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem)) total_subscribers = nosql.ListField( nosql.EmbeddedDocumentField(SummaryItem)) # Paths adm_path = nosql.ListField(nosql.IntField()) segment_path = nosql.ListField(nosql.ObjectIdField()) container_path = nosql.ListField(nosql.ObjectIdField()) # Uplinks, for topology_rca only uplinks = nosql.ListField(nosql.IntField()) def __unicode__(self): return u"%s" % self.id def iter_changed_datastream(self): if config.datastream.enable_alarm: yield "alarm", self.id def log_message(self, message): self.log += [ AlarmLog(timestamp=datetime.datetime.now(), from_status=self.status, to_status=self.status, message=message) ] self.save() def get_template_vars(self): """ Prepare template variables """ vars = self.vars.copy() vars.update({"alarm": self}) return vars @property def subject(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.subject_template).render(ctx) if len(s) >= 255: s = s[:125] + " ... " + s[-125:] return s @property def body(self): ctx = Context(self.get_template_vars()) s = Template(self.alarm_class.body_template).render(ctx) return s @property def duration(self): dt = self.clear_timestamp - self.timestamp return dt.days * 86400 + dt.seconds @property def display_duration(self): duration = self.clear_timestamp - self.timestamp secs = duration.seconds % 60 mins = (duration.seconds / 60) % 60 hours = (duration.seconds / 3600) % 24 days = duration.days if days: return "%dd %02d:%02d:%02d" % (days, hours, mins, secs) else: return "%02d:%02d:%02d" % (hours, mins, secs) @property def effective_style(self): return AlarmSeverity.get_severity(self.severity).style def set_root(self, root_alarm): pass def reopen(self, message): """ Reopen alarm back """ reopens = self.reopens or 0 ts = datetime.datetime.now() log = self.log + [ AlarmLog( timestamp=ts, from_status="C", to_status="A", message=message) ] a = ActiveAlarm(id=self.id, timestamp=self.timestamp, last_update=ts, managed_object=self.managed_object, alarm_class=self.alarm_class, severity=self.severity, vars=self.vars, log=log, root=self.root, escalation_ts=self.escalation_ts, escalation_tt=self.escalation_tt, escalation_error=self.escalation_error, escalation_ctx=self.escalation_ctx, opening_event=self.opening_event, discriminator=self.discriminator, reopens=reopens + 1, direct_services=self.direct_services, direct_subscribers=self.direct_subscribers, total_objects=self.total_objects, total_services=self.total_services, total_subscribers=self.total_subscribers, adm_path=self.adm_path, segment_path=self.segment_path, container_path=self.container_path, uplinks=self.uplinks) a.save() # @todo: Clear related correlator jobs self.delete() # Send notifications # Do not set notifications for child and for previously reopened # alarms if not a.root and not reopens: a.managed_object.event( a.managed_object.EV_ALARM_REOPENED, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes }) return a def iter_consequences(self): """ Generator yielding all consequences alarm """ for a in ArchivedAlarm.objects.filter(root=self.id): yield a for ca in a.iter_consequences(): yield ca def iter_affected(self): """ Generator yielding all affected managed objects """ seen = set([self.managed_object]) yield self.managed_object for a in self.iter_consequences(): if a.managed_object not in seen: seen.add(a.managed_object) yield a.managed_object def set_escalation_close_error(self, error): self.escalation_error = error self._get_collection().update( {"_id": self.id}, {"$set": { "escalation_close_error": error }}) def close_escalation(self): now = datetime.datetime.now() self.escalation_close_ts = now self._get_collection().update({"_id": self.id}, {"$set": { "escalation_close_ts": now }}) def set_escalation_close_ctx(self): current_context, current_span = get_current_span() if current_context or self.escalation_close_ctx: self.escalation_close_ctx = current_context self._get_collection().update( {"_id": self.id}, {"$set": { "escalation_close_ctx": current_context }})