async def raise_alarm(self, r, e): managed_object = self.eval_expression(r.managed_object, event=e) if not managed_object: self.logger.info("Empty managed object, ignoring") return # @todo: Make configurable if not managed_object.is_managed: self.logger.info( "Managed object is not managed. Do not raise alarm") return if e.managed_object.id != managed_object.id: metrics["alarm_change_mo"] += 1 self.logger.info("Changing managed object to %s", managed_object.name) discriminator, vars = r.get_vars(e) if r.unique: assert discriminator is not None a = ActiveAlarm.objects.filter( managed_object=managed_object.id, discriminator=discriminator).first() if not a: # Try to reopen alarm a = ArchivedAlarm.objects.filter( managed_object=managed_object.id, discriminator=discriminator, control_time__gte=e.timestamp, ).first() if a: # Reopen alarm self.logger.info( "[%s|%s|%s] %s reopens alarm %s(%s)", e.id, managed_object.name, managed_object.address, e.event_class.name, a.alarm_class.name, a.id, ) a = a.reopen("Reopened by disposition rule '%s'" % r.u_name) metrics["alarm_reopen"] += 1 if a: # Active alarm found, refresh self.logger.info( "[%s|%s|%s] Contributing event %s to active alarm %s(%s)", e.id, managed_object.name, managed_object.address, e.event_class.name, a.alarm_class.name, a.id, ) # Contribute event to alarm e.contribute_to_alarm(a) if e.timestamp < a.timestamp: # Set to earlier date a.timestamp = e.timestamp a.save() elif e.timestamp > a.last_update: # Refresh last update a.last_update = e.timestamp a.save() metrics["alarm_contribute"] += 1 return # Calculate alarm coverage summary = ServiceSummary.get_object_summary(managed_object) summary["object"] = {managed_object.object_profile.id: 1} # severity = max(ServiceSummary.get_severity(summary), 1) self.logger.info( "[%s|%s|%s] %s: Calculated alarm severity is: %s", e.id, managed_object.name, managed_object.address, r.u_name, severity, ) # Create new alarm direct_services = SummaryItem.dict_to_items(summary["service"]) direct_subscribers = SummaryItem.dict_to_items(summary["subscriber"]) a = ActiveAlarm( timestamp=e.timestamp, last_update=e.timestamp, managed_object=managed_object.id, alarm_class=r.alarm_class, severity=severity, vars=vars, discriminator=discriminator, direct_services=direct_services, direct_subscribers=direct_subscribers, total_objects=ObjectSummaryItem.dict_to_items(summary["object"]), total_services=direct_services, total_subscribers=direct_subscribers, log=[ AlarmLog( timestamp=datetime.datetime.now(), from_status="A", to_status="A", message="Alarm risen from event %s(%s) by rule '%s'" % (str(e.id), str(e.event_class.name), r.u_name), ) ], opening_event=e.id, ) a.save() e.contribute_to_alarm(a) self.logger.info( "[%s|%s|%s] %s raises alarm %s(%s): %r", e.id, managed_object.name, managed_object.address, e.event_class.name, a.alarm_class.name, a.id, a.vars, ) metrics["alarm_raise"] += 1 await self.correlate(r, a) # Notify about new alarm if not a.root: a.managed_object.event( a.managed_object.EV_ALARM_RISEN, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes, }, delay=a.alarm_class.get_notification_delay(), ) # Gather diagnostics when necessary AlarmDiagnosticConfig.on_raise(a) # Watch for escalations, when necessary if config.correlator.auto_escalation and not a.root: AlarmEscalation.watch_escalations(a)
def get_data(self): intervals = ( ("y", 31557617), # 60 * 60 * 24 * 7 * 52 ("w", 604800), # 60 * 60 * 24 * 7 ("d", 86400), # 60 * 60 * 24 ("h", 3600), # 60 * 60 ("m", 60), ("s", 1), ) def display_time(seconds): result = [] for name, count in intervals: value = seconds // count if value: seconds -= value * count if value == 1: name = name.rstrip("s") result.append("{}{}".format(value, name)) return ", ".join(result[:-1]) def sortdict(dct): kys = sorted(dct.keys()) res = OrderedDict() for x in kys: for k, v in dct.items(): if k == x: res[k] = v return res def get_container_path(self): # Get container path if not self.object: return None cp = [] if self.object.container: c = self.object.container.id while c: try: o = Object.objects.get(id=c) # @todo: Address data if o.container: cp.insert(0, {"id": o.id, "name": o.name}) c = o.container.id if o.container else None except DoesNotExist: metrics["error", ("type", "no_such_object")] += 1 break return cp if not self.object: return None # @todo: Stage # @todo: Service range # @todo: Open TT now = datetime.datetime.now() # Get object status and uptime alarms = list( ActiveAlarm.objects.filter(managed_object=self.object.id)) current_start = None duration = None if self.object.is_managed: if self.object.get_status(): if alarms: current_state = "alarm" else: current_state = "up" uptime = Uptime.objects.filter(object=self.object.id, stop=None).first() if uptime: current_start = uptime.start else: current_state = "down" outage = Outage.objects.filter(object=self.object.id, stop=None).first() if outage is not None: current_start = outage.start else: current_state = "unmanaged" if current_start: duration = now - current_start cp = get_container_path(self) # MAC addresses macs = [] o_macs = DiscoveryID.macs_for_object(self.object) if o_macs: for f, l in o_macs: if f == l: macs += [f] else: macs += ["%s - %s" % (f, l)] # Hostname hostname = "" did = DiscoveryID.objects.filter(object=self.object.id).first() if did and did.hostname: hostname = did.hostname # Links uplinks = set(self.object.data.uplinks) if len(uplinks) > 1: if self.object.segment.lost_redundancy: redundancy = "L" else: redundancy = "R" else: redundancy = "N" links = [] for _link in Link.object_links(self.object): local_interfaces = [] remote_interfaces = [] remote_objects = set() for iface in _link.interfaces: if iface.managed_object.id == self.object.id: local_interfaces += [iface] else: remote_interfaces += [iface] remote_objects.add(iface.managed_object) if len(remote_objects) == 1: ro = remote_objects.pop() if ro.id in uplinks: role = "uplink" else: role = "downlink" links += [{ "id": _link.id, "role": role, "local_interface": sorted(local_interfaces, key=lambda x: alnum_key(x.name)), "remote_object": ro, "remote_interface": sorted(remote_interfaces, key=lambda x: alnum_key(x.name)), "remote_status": "up" if ro.get_status() else "down", }] links = sorted( links, key=lambda x: (x["role"] != "uplink", alnum_key(x["local_interface"][0].name)), ) # Build global services summary service_summary = ServiceSummary.get_object_summary(self.object) # Interfaces interfaces = [] mo = ManagedObject.objects.filter(id=self.object.id) mo = mo[0] ifaces_metrics, last_ts = get_interface_metrics(mo) ifaces_metrics = ifaces_metrics[mo] objects_metrics, last_time = get_objects_metrics(mo) objects_metrics = objects_metrics.get(mo) # Sensors sensors_metrics = None s_metrics = None sensors = {} s_meta = [] STATUS = {0: "OK", 1: "Alarm"} meric_map = {} if mo.get_caps().get("Sensor | Controller"): for mc in MetricType.objects.filter(scope=MetricScope.objects.get( name="Environment")): if meric_map: meric_map["map"].update({mc.field_name: mc.name}) else: meric_map = { "table_name": mc.scope.table_name, "map": { mc.field_name: mc.name } } sensors_metrics, last_ts = get_interface_metrics(mo, meric_map) sensors_metrics = sensors_metrics[mo] m_tp = {} if mo.object_profile.metrics: for mt in mo.object_profile.metrics: if mt.get("threshold_profile"): threshold_profile = ThresholdProfile.get_by_id( mt.get("threshold_profile")) m_tp[MetricType.get_by_id( mt.get("metric_type")).name] = threshold_profile data = {} meta = [] metric_type_name = dict(MetricType.objects.filter().scalar( "name", "measure")) metric_type_field = dict(MetricType.objects.filter().scalar( "field_name", "measure")) if objects_metrics: for path, mres in objects_metrics.items(): t_v = False for key in mres: m_path = path if any(path.split("|")) else key m_path = " | ".join(kk.strip() for kk in m_path.split("|")) if m_tp.get(key): t_v = self.get_threshold_config( m_tp.get(key), int(mres[key])) val = { "name": m_path, "type": "" if m_path == "Object | SysUptime" else metric_type_name[key], "value": display_time(int(mres[key])) if m_path == "Object | SysUptime" else mres[key], "threshold": t_v, } if data.get(key): data[key] += [val] else: data[key] = [val] data = sortdict(data) for k, d in data.items(): collapsed = False if len(d) == 1: collapsed = True for dd in d: isdanger = False if dd["threshold"]: isdanger = True collapsed = True meta.append({ "name": k, "value": d, "collapsed": collapsed, "isdanger": isdanger }) for i in Interface.objects.filter(managed_object=self.object.id, type="physical"): load_in = "-" load_out = "-" errors_in = "-" errors_out = "-" iface_metrics = ifaces_metrics.get(str(i.name)) if iface_metrics: for key, value in iface_metrics.items(): metric_type = metric_type_name.get( key) or metric_type_field.get(key) if key == "Interface | Load | In": load_in = ("%s%s" % (self.humanize_speed(value, metric_type), metric_type) if value else "-") if key == "Interface | Load | Out": load_out = ("%s%s" % (self.humanize_speed(value, metric_type), metric_type) if value else "-") if key == "Interface | Errors | In": errors_in = value if value else "-" if key == "Interface | Errors | Out": errors_out = value if value else "-" interfaces += [{ "id": i.id, "name": i.name, "admin_status": i.admin_status, "oper_status": i.oper_status, "mac": i.mac or "", "full_duplex": i.full_duplex, "load_in": load_in, "load_out": load_out, "errors_in": errors_in, "errors_out": errors_out, "speed": max([i.in_speed or 0, i.out_speed or 0]) / 1000, "untagged_vlan": None, "tagged_vlan": None, "profile": i.profile, "service": i.service, "service_summary": service_summary.get("interface").get(i.id, {}), "description": i.description, }] if sensors_metrics: s_metrics = sensors_metrics.get(str(i.name)) if s_metrics: sens_metrics = [] for i_metrics in i.profile.metrics: sens_metrics.append(i_metrics.metric_type.name) for key, value in s_metrics.items(): if key not in sens_metrics: continue val = { "name": key, "type": metric_type_name[key], "value": STATUS.get(value) if metric_type_name[key] == " " else value, "threshold": None, } if sensors.get(i.name): sensors[i.name] += [val] else: sensors[i.name] = [val] si = list(i.subinterface_set.filter(enabled_afi="BRIDGE")) if len(si) == 1: si = si[0] interfaces[-1]["untagged_vlan"] = si.untagged_vlan interfaces[-1]["tagged_vlans"] = list_to_ranges( si.tagged_vlans).replace(",", ", ") if sensors: sensors = sortdict(sensors) for k, d in sensors.items(): for dd in d: isdanger = False if dd["threshold"]: isdanger = True s_meta.append({"name": k, "value": d, "isdanger": isdanger}) interfaces = sorted(interfaces, key=lambda x: alnum_key(x["name"])) # Resource groups # Service groups (i.e. server) static_services = set(self.object.static_service_groups) service_groups = [] for rg_id in self.object.effective_service_groups: rg = ResourceGroup.get_by_id(rg_id) service_groups += [{ "id": rg_id, "name": rg.name, "technology": rg.technology, "is_static": rg_id in static_services, }] # Client groups (i.e. client) static_clients = set(self.object.static_client_groups) client_groups = [] for rg_id in self.object.effective_client_groups: rg = ResourceGroup.get_by_id(rg_id) client_groups += [{ "id": rg_id, "name": rg.name, "technology": rg.technology, "is_static": rg_id in static_clients, }] # @todo: Administrative domain path # Alarms alarm_list = [] for a in alarms: alarm_list += [{ "id": a.id, "root_id": self.get_root(alarms), "timestamp": a.timestamp, "duration": now - a.timestamp, "subject": a.subject, "managed_object": a.managed_object, "service_summary": { "service": SummaryItem.items_to_dict(a.total_services), "subscriber": SummaryItem.items_to_dict(a.total_subscribers), }, "alarm_class": a.alarm_class, }] alarm_list = sorted(alarm_list, key=operator.itemgetter("timestamp")) # Maintenance maintenance = [] for ao in AffectedObjects._get_collection().find( {"affected_objects": { "object": self.object.id }}): m = Maintenance.objects.filter( id=ao["maintenance"], is_completed=False, start__lte=now + datetime.timedelta(hours=1), ).first() if m: maintenance += [{ "maintenance": m, "id": m.id, "subject": m.subject, "start": m.start, "stop": m.stop, "in_progress": m.start <= now, }] # Get Inventory inv = [] for p in self.object.get_inventory(): c = self.get_nested_inventory(p) c["name"] = p.name or self.object.name inv += [c] # Build result if self.object.platform is not None: platform = self.object.platform.name else: platform = "Unknown" if self.object.version is not None: version = self.object.version.version else: version = "" r = { "id": self.object.id, "object": self.object, "name": self.object.name, "address": self.object.address, "platform": platform, # self.object.platform.name if self.object.platform else "Unknown", "version": version, # self.object.version.version if self.object.version else "", "description": self.object.description, "object_profile": self.object.object_profile.id, "object_profile_name": self.object.object_profile.name, "hostname": hostname, "macs": ", ".join(sorted(macs)), "segment": self.object.segment, "firmware_status": FirmwarePolicy.get_status(self.object.platform, self.object.version), "firmware_recommended": FirmwarePolicy.get_recommended_version(self.object.platform), "service_summary": service_summary, "container_path": cp, "current_state": current_state, # Start of uptime/downtime "current_start": current_start, # Current uptime/downtime "current_duration": duration, "service_groups": service_groups, "client_groups": client_groups, "tt": [], "links": links, "alarms": alarm_list, "interfaces": interfaces, "metrics": meta, "sensors": s_meta, "maintenance": maintenance, "redundancy": redundancy, "inventory": self.flatten_inventory(inv), "serial_number": self.object.get_attr("Serial Number"), "attributes": list( ManagedObjectAttribute.objects.filter( managed_object=self.object.id)), "confdb": None, } try: r["confdb"] = self.object.get_confdb() except (SyntaxError, ValueError): pass return r
def get_data(self): def get_container_path(self): # Get container path if not self.object: return None cp = [] if self.object.container: c = self.object.container.id while c: try: o = Object.objects.get(id=c) # @todo: Address data if o.container: cp.insert(0, { "id": o.id, "name": o.name }) c = o.container.id if o.container else None except DoesNotExist: metrics["error", ("type", "no_such_object")] += 1 break return cp if not self.object: return None # @todo: Stage # @todo: Service range # @todo: Open TT now = datetime.datetime.now() # Get object status and uptime alarms = list(ActiveAlarm.objects.filter(managed_object=self.object.id)) current_start = None duration = None if self.object.is_managed: if self.object.get_status(): if alarms: current_state = "alarm" else: current_state = "up" uptime = Uptime.objects.filter(object=self.object.id, stop=None).first() if uptime: current_start = uptime.start else: current_state = "down" outage = Outage.objects.filter(object=self.object.id, stop=None).first() if outage is not None: current_start = outage.start else: current_state = "unmanaged" if current_start: duration = now - current_start cp = get_container_path(self) # MAC addresses macs = [] o_macs = DiscoveryID.macs_for_object(self.object) if o_macs: for f, l in o_macs: if f == l: macs += [f] else: macs += ["%s - %s" % (f, l)] # Links uplinks = set(self.object.data.uplinks) if len(uplinks) > 1: if self.object.segment.lost_redundancy: redundancy = "L" else: redundancy = "R" else: redundancy = "N" links = [] for l in Link.object_links(self.object): local_interfaces = [] remote_interfaces = [] remote_objects = set() for i in l.interfaces: if i.managed_object.id == self.object.id: local_interfaces += [i] else: remote_interfaces += [i] remote_objects.add(i.managed_object) if len(remote_objects) == 1: ro = remote_objects.pop() if ro.id in uplinks: role = "uplink" else: role = "downlink" links += [{ "id": l.id, "role": role, "local_interface": sorted( local_interfaces, key=lambda x: split_alnum(x.name) ), "remote_object": ro, "remote_interface": sorted( remote_interfaces, key=lambda x: split_alnum(x.name) ), "remote_status": "up" if ro.get_status() else "down" }] links = sorted(links, key=lambda x: (x["role"] != "uplink", split_alnum(x["local_interface"][0].name))) # Build global services summary service_summary = ServiceSummary.get_object_summary(self.object) # Interfaces interfaces = [] mo = ManagedObject.objects.filter(id=self.object.id) iface_metrics, last_ts = get_interface_metrics(mo[0]) iface_metrics = iface_metrics[mo[0]] objects_metrics, last_time = get_objects_metrics(mo[0]) objects_metrics = objects_metrics.get(mo[0]) meta = "" metric_type_name = dict(MetricType.objects.filter().scalar("name", "measure")) metric_type_field = dict(MetricType.objects.filter().scalar("field_name", "measure")) if objects_metrics is not None: if objects_metrics.get("") is not None: for key in objects_metrics.get("").keys(): if metric_type_name[key] in ["bytes", "bit/s", "bool"]: objects_metrics.get("")[key] = { "type": metric_type_name[key], "value": self.humanize_speed(objects_metrics.get("")[key], metric_type_name[key]) } else: objects_metrics.get("")[key] = { "type": metric_type_name[key], "value": objects_metrics.get("")[key] } meta = objects_metrics.get("") else: meta = {} if iface_metrics is not None: for i in Interface.objects.filter(managed_object=self.object.id, type="physical"): load_in = "-" load_out = "-" errors_in = "-" errors_out = "-" iface_get_link_name = iface_metrics.get(str(i.name)) if iface_get_link_name is not None: for key in iface_get_link_name.keys(): meta_type = metric_type_name.get(key) or metric_type_field.get(key) iface_get_link_name[key] = { "type": meta_type, "value": self.humanize_speed( str(iface_get_link_name[key]), meta_type) } if key in ['Interface | Load | In', 'Interface | Load | Out', 'Interface | Errors | In', 'Interface | Errors | Out']: try: load_in = iface_get_link_name['Interface | Load | In']["value"] + \ iface_get_link_name['Interface | Load | In']["type"] load_out = iface_get_link_name['Interface | Load | Out']["value"] + \ iface_get_link_name['Interface | Load | Out']["type"] errors_in = iface_get_link_name['Interface | Errors | In']["value"] errors_out = iface_get_link_name['Interface | Errors | Out']["value"] except TypeError: pass else: iface_get_link_name = {} interfaces += [{ "id": i.id, "name": i.name, "admin_status": i.admin_status, "oper_status": i.oper_status, "mac": i.mac or "", "full_duplex": i.full_duplex, "load_in": load_in, "load_out": load_out, "errors_in": errors_in, "errors_out": errors_out, "speed": max([i.in_speed or 0, i.out_speed or 0]) / 1000, "untagged_vlan": None, "tagged_vlan": None, "profile": i.profile, "service": i.service, "service_summary": service_summary.get("interface").get(i.id, {}) }] si = list(i.subinterface_set.filter(enabled_afi="BRIDGE")) if len(si) == 1: si = si[0] interfaces[-1]["untagged_vlan"] = si.untagged_vlan interfaces[-1]["tagged_vlans"] = list_to_ranges(si.tagged_vlans).replace(",", ", ") interfaces = sorted(interfaces, key=lambda x: split_alnum(x["name"])) # Resource groups # Service groups (i.e. server) static_services = set(self.object.static_service_groups) service_groups = [] for rg_id in self.object.effective_service_groups: rg = ResourceGroup.get_by_id(rg_id) service_groups += [{ "id": rg_id, "name": rg.name, "technology": rg.technology, "is_static": rg_id in static_services }] # Client groups (i.e. client) static_clients = set(self.object.static_client_groups) client_groups = [] for rg_id in self.object.effective_client_groups: rg = ResourceGroup.get_by_id(rg_id) client_groups += [{ "id": rg_id, "name": rg.name, "technology": rg.technology, "is_static": rg_id in static_clients }] # @todo: Administrative domain path # Alarms alarm_list = [] for a in alarms: alarm_list += [{ "id": a.id, "root_id": self.get_root(alarms), "timestamp": a.timestamp, "duration": now - a.timestamp, "subject": a.subject, "managed_object": a.managed_object, "service_summary": { "service": SummaryItem.items_to_dict(a.total_services), "subscriber": SummaryItem.items_to_dict(a.total_subscribers)}, "alarm_class": a.alarm_class }] alarm_list = sorted(alarm_list, key=operator.itemgetter("timestamp")) # Maintenance maintenance = [] for m in Maintenance.objects.filter( affected_objects__object=self.object.id, is_completed=False, start__lte=now + datetime.timedelta(hours=1) ): maintenance += [{ "maintenance": m, "id": m.id, "subject": m.subject, "start": m.start, "stop": m.stop, "in_progress": m.start <= now }] # Get Inventory inv = [] for p in self.object.get_inventory(): c = self.get_nested_inventory(p) c["name"] = p.name or self.object.name inv += [c] # Build result if self.object.platform is not None: platform = self.object.platform.name else: platform = "Unknown" if self.object.version is not None: version = self.object.version.version else: version = "" r = { "id": self.object.id, "object": self.object, "name": self.object.name, "address": self.object.address, "platform": platform, # self.object.platform.name if self.object.platform else "Unknown", "version": version, # self.object.version.version if self.object.version else "", "description": self.object.description, "object_profile": self.object.object_profile.id, "object_profile_name": self.object.object_profile.name, "macs": ", ".join(sorted(macs)), "segment": self.object.segment, "firmware_status": FirmwarePolicy.get_status( self.object.platform, self.object.version), "firmware_recommended": FirmwarePolicy.get_recommended_version(self.object.platform), "service_summary": service_summary, "container_path": cp, "current_state": current_state, # Start of uptime/downtime "current_start": current_start, # Current uptime/downtime "current_duration": duration, "service_groups": service_groups, "client_groups": client_groups, "tt": [], "links": links, "alarms": alarm_list, "interfaces": interfaces, "metrics": meta, "maintenance": maintenance, "redundancy": redundancy, "inventory": self.flatten_inventory(inv), "serial_number": self.object.get_attr("Serial Number") } return r