def test_sequence_searcher_section_start_end_same(self): """ Test scenario: * multiple sections that end with start of the next * start def matches unique start * end def matches any start """ with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: ftmp.write(SEQ_TEST_7) ftmp.close() s = FileSearcher() sd = SequenceSearchDef(start=SearchDef(r"^section (2)"), body=SearchDef(r"\d_\d"), end=SearchDef( r"^section (\d+)"), tag="seq-search-test7") s.add_search_term(sd, path=ftmp.name) results = s.search() sections = results.find_sequence_sections(sd) self.assertEqual(len(sections), 1) for id in sections: for r in sections[id]: if r.tag == sd.start_tag: self.assertEqual(r.get(1), "2") elif r.tag == sd.body_tag: self.assertTrue(r.get(0) in ["2_1"]) os.remove(ftmp.name)
def get_osd_lvm_info(self): if not self.ceph_volume_lvm_list: return ceph_osds = self.services.get("ceph-osd") if not ceph_osds: return 0 f_ceph_volume_lvm_list = mktemp_dump('\n'.join( self.ceph_volume_lvm_list)) s = FileSearcher() sd = SequenceSearchDef(start=SearchDef(r"^=+\s+osd\.(\d+)\s+=+.*"), body=SearchDef([ r"\s+osd\s+(fsid)\s+(\S+)\s*", r"\s+(devices)\s+([\S]+)\s*" ]), tag="ceph-lvm") s.add_search_term(sd, path=f_ceph_volume_lvm_list) info = {} for results in s.search().find_sequence_sections(sd).values(): _osd_id = None _info = {} for result in results: if result.tag == sd.start_tag: _osd_id = int(result.get(1)) elif result.tag == sd.body_tag: if result.get(1) == "fsid": _info["fsid"] = result.get(2) elif result.get(1) == "devices": _info["dev"] = result.get(2) info[_osd_id] = _info os.unlink(f_ceph_volume_lvm_list) return info
def test_filesearcher_network_info(self): filepath = os.path.join(os.environ["DATA_ROOT"], 'sos_commands', 'networking', 'ip_-d_address') filepath2 = os.path.join(os.environ["DATA_ROOT"], 'sos_commands', 'networking', 'ip_-s_-d_link') ip = "10.10.101.33" mac = "ac:1f:6b:9e:d8:44" s = FileSearcher() sd = SearchDef(r".+({}).+".format(ip)) s.add_search_term(sd, filepath) sd = SearchDef(r"^\s+link/ether\s+({})\s+.+".format(mac)) s.add_search_term(sd, filepath2) results = s.search() self.assertEquals(set(results.files), set([filepath, filepath2])) self.assertEquals(len(results.find_by_path(filepath)), 1) self.assertEquals(len(results.find_by_path(filepath2)), 3) self.assertEquals(results.find_by_path(filepath)[0].linenumber, 16) for result in results.find_by_path(filepath): self.assertEquals(result.get(1), ip) expected = {8: mac, 15: mac, 22: mac} for result in results.find_by_path(filepath2): ln = result.linenumber self.assertEquals(result.tag, None) self.assertEquals(result.get(1), expected[ln])
def register_report_searches(self): """Register all sequence search definitions that we will execute against rabbitmqctl report. NOTE: the rabbitmqctl report output differs between versions 3.6.x and 3.8.x and we try to account for either by providing optional regex expressions to match either. """ self._sequences = { "queues": { "searchdef": SequenceSearchDef( start=SearchDef([r"^Queues on ([^:]+):", (r"^Listing queues for vhost ([^:]+) " r"...")]), # NOTE: we don't use a list for the body here because # we need to know which expression matched so that we # can know in which order to retrieve the columns since # their order is inverted between 3.6.x and 3.8.x body=SearchDef(r"^(?:<([^.\s]+)[.0-9]+>\s+(\S+)|" r"(\S+)\s+(?:\S+\s+){4}<([^.\s]+)[.0-9]" r"+>)\s+.+"), end=SearchDef(r"^$"), tag="queues"), "callbacks": [self.get_queue_info] }, "connections": { "searchdef": SequenceSearchDef( start=SearchDef([r"^Connections:$", r"^Listing connections ...$"]), body=SearchDef(r"^<(rabbit[^>.]*)(?:[.][0-9]+)+>.*$"), end=SearchDef(r"^$"), tag="connections"), "callbacks": [self.get_queue_connection_distribution] }, "memory": { "searchdef": SequenceSearchDef( start=SearchDef([r"^Status of node '([^']*)'$", r"^Status of node ([^']*) ...$"]), body=SearchDef(r"^\s+\[{total,([0-9]+)}.+"), end=SearchDef(r"^$"), tag="memory"), "callbacks": [self.get_memory_used] }, "partitioning": { "searchdef": SearchDef(r"^\s*{cluster_partition_handling,([^}]*)}", tag="cluster_partition_handling"), "callbacks": [self.get_partition_handling] } } for s in self._sequences.values(): self.searcher.add_search_term(s["searchdef"], self.f_report)
def register_search_terms(self): self.sequence_defs.append( SequenceSearchDef(start=SearchDef(r"\s+port \d+: (\S+) .+"), body=SearchDef( r"\s+([RT]X) \S+:(\d+) \S+:(\d+) \S+:(\d+) " r"\S+:(\d+) \S+:(\d+)"), tag="port-stats")) for sd in self.sequence_defs: self.search_obj.add_search_term(sd, self.f_dpctl)
def register_report_searches(self): """Register all sequence search definitions that we will execute against rabbitmqctl report. """ self._sequences = { "queues": { "searchdef": SequenceSearchDef( start=SearchDef(r"^Queues on ([^:]+):"), body=SearchDef(r"^<([^.\s]+)[.0-9]+>\s+(\S+)\s+.+"), end=SearchDef(r"^$"), tag="queues"), "callbacks": [self.get_queues] }, "connections": { "searchdef": SequenceSearchDef( start=SearchDef(r"^Connections:$"), body=SearchDef(r"^<(rabbit[^>.]*)(?:[.][0-9]+)+>.*$"), end=SearchDef(r"^$"), tag="connections"), "callbacks": [self.get_queue_connection_distribution] }, "memory": { "searchdef": SequenceSearchDef( start=SearchDef(r"^Status of node '([^']*)'$"), body=SearchDef(r"^\s+\[{total,([0-9]+)}.+"), end=SearchDef(r"^$"), tag="memory"), "callbacks": [self.get_memory_used] } } for s in self._sequences.values(): self.searcher.add_search_term(s["searchdef"], self.report_path)
def add_rpc_loop_search_terms(self): """Add search terms for start and end of a neutron openvswitch agent rpc loop. """ expr = (r"^([0-9\-]+) (\S+) .+ Agent rpc_loop - iteration:([0-9]+) " "started.*") self.searchobj.add_search_term(SearchDef(expr, tag="rpc-loop-start", hint="Agent rpc_loop"), self.data_source) expr = (r"^([0-9\-]+) (\S+) .+ Agent rpc_loop - iteration:([0-9]+) " "completed..+Elapsed:([0-9.]+).+") self.searchobj.add_search_term(SearchDef(expr, tag="rpc-loop-end", hint="Agent rpc_loop"), self.data_source)
def get_hm_amphora_missed_heartbeats(self): missed_heartbeats = {} expr = (r"^(\S+) \S+ .+ Amphora (\S+) health message was processed " r"too slowly:.+") d = SearchDef(expr, tag="amp-missed-hb", hint="health message") self.searcher.add_search_term(d, self.data_sources["health-manager"]) results = self.searcher.search() for r in results.find_by_tag("amp-missed-hb"): ts_date = r.get(1) amp_id = r.get(2) if ts_date not in missed_heartbeats: missed_heartbeats[ts_date] = {} if amp_id in missed_heartbeats[ts_date]: missed_heartbeats[ts_date][amp_id] += 1 else: missed_heartbeats[ts_date][amp_id] = 1 # sort each amp by occurences for ts_date in missed_heartbeats: d = utils.sorted_dict(missed_heartbeats[ts_date], key=lambda e: e[1], reverse=True) missed_heartbeats[ts_date] = d if missed_heartbeats: # not sort by date LB_CHECKS["amp-missed-heartbeats"] = \ utils.sorted_dict(missed_heartbeats)
def detect_known_bugs(): """Unit fails to start complaining there are members in the relation.""" known_bugs = { 1910958: { "description": ("Unit fails to start complaining there are " "members in the relation."), "pattern": ( r'.* manifold worker returned unexpected error: failed to ' r'initialize uniter for "[A-Za-z0-9-]+": cannot create ' r'relation state tracker: cannot remove persisted state, ' r'relation \d+ has members'), "hint": "manifold worker returned unexpected error", } } s = FileSearcher() for bug in known_bugs: sd = SearchDef(known_bugs[bug]["pattern"], tag=1910958, hint=known_bugs[bug]["hint"]) s.add_search_term(sd, f"{JUJU_LOG_PATH}/*") results = s.search() for bug in known_bugs: if results.find_by_tag(bug): add_known_bug(bug, known_bugs.get("description"))
def register_mtu_dropped_packets_search(self): path = os.path.join(constants.DATA_ROOT, 'var/log/kern.log') if constants.USE_ALL_LOGS: path = path + "*" sdef = SearchDef(r".+\] (\S+): dropped over-mtu packet", hint="dropped", tag="over-mtu") self.search_obj.add_search_term(sdef, path)
def register_search_terms(self): for d in OVS_DAEMONS: path = os.path.join(constants.DATA_ROOT, OVS_DAEMONS[d]["logs"]) if constants.USE_ALL_LOGS: path = f"{path}*" sd = SearchDef(r"([0-9-]+)T([0-9:\.]+)Z.+\|(ERR|ERROR|WARN)\|.+", tag=d) self.search_obj.add_search_term(sd, path)
def get_lb_failovers(self): """Get loadbalancer failover counts.""" failovers = {} expr = (r"^(\S+) \S+ .+ Performing failover for amphora:\s+(.+)") d = SearchDef(expr, tag="lb-failover-auto", hint="failover") self.searcher.add_search_term(d, self.data_sources["health-manager"]) expr = (r"^(\S+) \S+ .+ Performing failover for amphora:\s+(.+)") d = SearchDef(expr, tag="lb-failover-manual", hint="failover") self.searcher.add_search_term(d, self.data_sources["worker"]) for fo_type in ["auto", "manual"]: results = self.searcher.search() for r in results.find_by_tag("lb-failover-{}".format(fo_type)): ts_date = r.get(1) payload = r.get(2) payload = yaml.safe_load(payload) lb_id = payload.get("load_balancer_id") if lb_id is None: continue if fo_type not in failovers: failovers[fo_type] = {} if ts_date not in failovers[fo_type]: failovers[fo_type][ts_date] = {} if lb_id in failovers[fo_type][ts_date]: failovers[fo_type][ts_date][lb_id] += 1 else: failovers[fo_type][ts_date][lb_id] = 1 for fo_type in failovers: # sort each failover by occurences for ts_date in failovers[fo_type]: d = utils.sorted_dict(failovers[fo_type][ts_date], key=lambda e: e[1], reverse=True) failovers[fo_type][ts_date] = d # now sort the dates d = utils.sorted_dict(failovers[fo_type]) if failovers: LB_CHECKS["lb-failovers"] = failovers
def add_bug_search_terms(self): """Add search terms for known bugs.""" data_source = os.path.join(constants.DATA_ROOT, 'var/log/syslog') if constants.USE_ALL_LOGS: data_source = "{}*".format(data_source) for tag in self.agent_bug_search_terms: expr = self.agent_bug_search_terms[tag]["expr"] self.searchobj.add_search_term(SearchDef(expr, tag=tag), data_source)
def test_filesearcher_error(self): s = FileSearcher() with mock.patch.object(SearchResult, '__init__') as mock_init: def fake_init(*args, **kwargs): raise EOFError("some error") mock_init.side_effect = fake_init path = os.path.join(os.environ["DATA_ROOT"]) s.add_search_term(SearchDef("."), path) s.search()
def _load_event_definitions(self): """ Load event search definitions from yaml. An event has two expressions; one to identify the start and one to identify the end. Note that these events can be overlapping hence why we don't use a SequenceSearchDef. """ path = os.path.join(constants.PLUGIN_YAML_DEFS, "events.yaml") with open(path) as fd: yaml_defs = yaml.safe_load(fd.read()) if not yaml_defs: return plugin_events = yaml_defs.get(constants.PLUGIN_NAME, {}) for group_name, group in plugin_events.get(self._yaml_root, {}).items(): for label in group: event = group[label] start = SearchDef(event["start"]["expr"], tag="{}-start".format(label), hint=event["start"]["hint"]) end = SearchDef(event["end"]["expr"], tag="{}-end".format(label), hint=event["end"]["hint"]) ds = os.path.join(constants.DATA_ROOT, event["datasource"]) if (event.get("allow-all-logs", True) and constants.USE_ALL_LOGS): ds = "{}*".format(ds) if group_name not in self._event_defs: self._event_defs[group_name] = {} if label not in self._event_defs[group_name]: self._event_defs[group_name][label] = {} self._event_defs[group_name][label] = \ {"searchdefs": [start, end], "datasource": ds}
def register_search_terms(self): path = os.path.join(constants.DATA_ROOT, OVS_DAEMONS[self.daemon]["logs"]) if constants.USE_ALL_LOGS: path = f"{path}*" fd = FilterDef(r"ERROR|WARN") self.search_obj.add_filter_term(fd, path) tag = "netdev_linux-no-such-device" sd = SearchDef(r"([0-9-]+)T([0-9:\.]+)Z.+\|(\S+): .+: No such device", tag=tag) self.tags.append(tag) self.search_obj.add_search_term(sd, path) tag = "bridge-no-such-device" sd = SearchDef( r"([0-9-]+)T([0-9:\.]+)Z.+\|could not open network " r"device (\S+) \(No such device\)", tag=tag) self.tags.append(tag) self.search_obj.add_search_term(sd, path)
def add_router_event_search_terms(self): logs_path = AGENT_LOG_PATHS["neutron"] if constants.USE_ALL_LOGS: data_source = os.path.join(constants.DATA_ROOT, logs_path, 'neutron-l3-agent.log*') else: data_source = os.path.join(constants.DATA_ROOT, logs_path, 'neutron-l3-agent.log') # router updates expr = (r"^([0-9-]+) (\S+) .+ Starting router update for " "([0-9a-z-]+), .+ update_id ([0-9a-z-]+). .+") self.searchobj.add_search_term(SearchDef(expr, tag="router-update-start", hint="router update"), data_source) expr = (r"^([0-9-]+) (\S+) .+ Finished a router update for " "([0-9a-z-]+), update_id ([0-9a-z-]+). Time elapsed: " "([0-9.]+)") self.searchobj.add_search_term(SearchDef(expr, tag="router-update-finish", hint="router update"), data_source) # router state_change_monitor + keepalived spawn expr = (r"^([0-9-]+) (\S+) .+ Router ([0-9a-z-]+) .+ " "spawn_state_change_monitor") self.searchobj.add_search_term(SearchDef(expr, tag="router-spawn1", hint="spawn_state_change"), data_source) expr = (r"^([0-9-]+) (\S+) .+ Keepalived spawned with config " "/var/lib/neutron/ha_confs/([0-9a-z-]+)/keepalived.conf .+") self.searchobj.add_search_term(SearchDef(expr, tag="router-spawn2", hint="Keepalived"), data_source)
def test_sequence_searcher_overlapping_incomplete(self): with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: ftmp.write(SEQ_TEST_3) ftmp.close() s = FileSearcher() sd = SequenceSearchDef(start=SearchDef( r"^(a\S*) (start\S*) point\S*"), body=SearchDef(r"leads to"), end=SearchDef(r"^an (ending)$"), tag="seq-search-test3") s.add_search_term(sd, path=ftmp.name) results = s.search() sections = results.find_sequence_sections(sd) self.assertEqual(len(sections), 1) for id in sections: for r in sections[id]: if r.tag == sd.start_tag: self.assertEqual(r.get(1), "another") elif r.tag == sd.end_tag: self.assertEqual(r.get(1), "ending") os.remove(ftmp.name)
def check_log_errors(self): path = os.path.join(constants.DATA_ROOT, 'var/log/rabbitmq/rabbit@*.log') if constants.USE_ALL_LOGS: path = f"{path}*" self.searcher.add_search_term(SearchDef(r".+ \S+_partitioned_network", tag="partitions"), path=path) results = self.searcher.search() if results.find_by_tag("partitions"): msg = ("cluster either has or has had partitions - check " "cluster_status") issues_utils.add_issue(issue_types.RabbitMQWarning(msg))
def add_agent_terms(self, service): """ Add search terms for warning, exceptions, errors etc i.e. anything that could count as an "issue" of interest. """ data_source_template = os.path.join(constants.DATA_ROOT, AGENT_LOG_PATHS[service], '{}.log') if constants.USE_ALL_LOGS: data_source_template = "{}*".format(data_source_template) for agent in AGENT_DAEMON_NAMES[service]: data_source = data_source_template.format(agent) for exc_msg in self.agent_exceptions[service]: expr = r"^([0-9\-]+) (\S+) .+{}.*".format(exc_msg) self.searchobj.add_search_term(SearchDef(expr, tag=agent, hint=exc_msg), data_source) for msg in self.agent_issues.get(service, []): expr = r"^([0-9\-]+) (\S+) .+{}.*".format(msg) self.searchobj.add_search_term(SearchDef(expr, tag=agent, hint=msg), data_source)
def test_sequence_searcher_multiple_sections(self): with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: ftmp.write(SEQ_TEST_5) ftmp.close() s = FileSearcher() sd = SequenceSearchDef(start=SearchDef( r"^(a\S*) (start\S*) point\S*"), body=SearchDef(r"value is (\S+)"), end=SearchDef(r"^$"), tag="seq-search-test5") s.add_search_term(sd, path=ftmp.name) results = s.search() sections = results.find_sequence_sections(sd) self.assertEqual(len(sections), 2) for id in sections: for r in sections[id]: if r.tag == sd.start_tag: self.assertEqual(r.get(1), "another") elif r.tag == sd.body_tag: self.assertTrue(r.get(1) in ["3", "4"]) elif r.tag == sd.end_tag: self.assertEqual(r.get(0), "") os.remove(ftmp.name)
def test_search_filter_invert_match(self): with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: ftmp.write(FILTER_TEST_1) ftmp.close() s = FileSearcher() fd = FilterDef(r" (ERROR)", invert_match=True) s.add_filter_term(fd, path=ftmp.name) sd = SearchDef(r".+ INFO (.+)") s.add_search_term(sd, path=ftmp.name) results = s.search().find_by_path(ftmp.name) self.assertEqual(len(results), 1) for r in results: self.assertEqual(r.get(1), "blah") os.remove(ftmp.name)
def _add_exception_searches(self): for svc, exprs in self._exception_exprs.items(): logpath = SERVICE_RESOURCES[svc]["logs"] data_source_template = os.path.join(constants.DATA_ROOT, logpath, '{}.log') if constants.USE_ALL_LOGS: data_source_template = "{}*".format(data_source_template) for agent in SERVICE_RESOURCES[svc]["daemons"]: data_source = data_source_template.format(agent) fd = FilterDef(r"( ERROR | WARNING |Traceback)") self.searchobj.add_filter_term(fd, data_source) for subexpr in exprs + self._agent_issues.get(svc, []): # NOTE: services running under apache have their logs # prepending with a load of apache/mod_wsgi info so we have # to do this way to account for both. We ignore the apache # prefix and it will not count towards the result. expr = (r"^(?:\[[\w :\.]+\].+\]\s+)?([0-9\-]+) (\S+) " ".+{}.*".format(subexpr)) sd = SearchDef(expr, tag=agent, hint=subexpr) self.searchobj.add_search_term(sd, data_source)
def get_vrrp_transitions(self): if "keepalived" not in L3HA_CHECKS: return self._get_journalctl_l3_agent() transitions = {} for router in L3HA_CHECKS["keepalived"]: vr_id = ROUTER_VR_IDS[router] expr = (r"^(\S+) [0-9]+ [0-9:]+ \S+ Keepalived_vrrp" r"\[([0-9]+)\]: VRRP_Instance\(VR_{}\) .+ (\S+) " "STATE.*".format(vr_id)) d = SearchDef(expr, tag=router) self.searcher.add_search_term(d, self.f_journalctl) results = self.searcher.search() for router in L3HA_CHECKS["keepalived"]: transitions[router] = len(results.find_by_tag(router)) if transitions: L3HA_CHECKS["keepalived"] = {"transitions": {}} for k, v in sorted(transitions.items(), key=lambda x: x[1], reverse=True): L3HA_CHECKS["keepalived"]["transitions"][k] = v
def get_vrrp_transitions(self): """ List routers that have had a vrrp state transition along with the number of transitions. Excludes routers that have not had any change of state. """ if not self.router_vrrp_pids: return self._get_journalctl_l3_agent() transitions = {} for router in self.router_vrrp_pids: vr_id = ROUTER_VR_IDS[router] expr = (r"^([0-9-]+)T\S+ \S+ Keepalived_vrrp" r"\[([0-9]+)\]: VRRP_Instance\(VR_{}\) .+ (\S+) " "STATE.*".format(vr_id)) d = SearchDef(expr, tag=router) self.searcher.add_search_term(d, self.f_journalctl) results = self.searcher.search() for router in self.router_vrrp_pids: t_count = len(results.find_by_tag(router)) if not t_count: continue for r in results.find_by_tag(router): ts_date = r.get(1) if router not in transitions: transitions[router] = {} if ts_date in transitions[router]: transitions[router][ts_date] += 1 else: transitions[router][ts_date] = 1 if transitions: L3HA_CHECKS["keepalived"] = {"transitions": transitions}
def get_osd_rss(self, osd_id): """Return memory RSS for a given OSD. NOTE: this assumes we have ps auxwwwm format. """ ceph_osds = self.services.get("ceph-osd") if not ceph_osds: return 0 f_osd_ps_cmds = mktemp_dump('\n'.join(ceph_osds['ps_cmds'])) s = FileSearcher() # columns: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND sd = SearchDef(r"\S+\s+\d+\s+\S+\s+\S+\s+\d+\s+(\d+)\s+.+/ceph-osd\s+" r".+--id\s+{}\s+.+".format(osd_id)) s.add_search_term(sd, path=f_osd_ps_cmds) rss = 0 # we only expect one result for result in s.search().find_by_path(f_osd_ps_cmds): rss = int(int(result.get(1)) / 1024) break os.unlink(f_osd_ps_cmds) return rss
def get_vrrp_transitions(self): if "keepalived" not in L3HA_CHECKS: return transitions = {} with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: if not constants.USE_ALL_LOGS: date = cli_helpers.get_date(format="--iso-8601").rstrip() else: date = None out = cli_helpers.get_journalctl(unit="neutron-l3-agent", date=date) ftmp.write(''.join(out)) ftmp.close() for router in L3HA_CHECKS["keepalived"]: vr_id = ROUTER_VR_IDS[router] expr = (r"^(\S+) [0-9]+ [0-9:]+ \S+ Keepalived_vrrp" r"\[([0-9]+)\]: VRRP_Instance\(VR_{}\) .+ (\S+) " "STATE.*".format(vr_id)) d = SearchDef(expr, tag=router) self.searcher.add_search_term(d, ftmp.name) results = self.searcher.search() for router in L3HA_CHECKS["keepalived"]: transitions[router] = len(results.find_by_tag(router)) os.unlink(ftmp.name) if transitions: L3HA_CHECKS["keepalived"] = {"transitions": {}} for k, v in sorted(transitions.items(), key=lambda x: x[1], reverse=True): L3HA_CHECKS["keepalived"]["transitions"][k] = v
def _get_port_stats(self, name=None, mac=None): """Get ip link stats for the given port.""" ip_link_show = cli_helpers.get_ip_link_show() stats_raw = [] if mac: libvirt_mac = "fe" + mac[2:] exprs = [] if mac: for _mac in [mac, libvirt_mac]: exprs.append(r"\s+link/ether\s+({})\s+.+".format(_mac)) else: exprs.append(r"\d+:\s+({}):\s+.+".format(name)) with tempfile.NamedTemporaryFile(mode='w', delete=False) as ftmp: ftmp.write(''.join(ip_link_show)) ftmp.close() s = FileSearcher() sd = SequenceSearchDef( # match start if interface start=SearchDef(r"^(?:{})".format('|'.join(exprs))), # match body of interface body=SearchDef(r".+"), # match next interface or EOF end=SearchDef(r"(?:^\d+:\s+\S+:.+|^$)"), tag="ifaces") s.add_search_term(sd, path=ftmp.name) results = s.search() for results in results.find_sequence_sections(sd).values(): for result in results: if result.tag == sd.body_tag: stats_raw.append(result.get(0)) # stop at first match - if matching by mac address it is # possible for multiple interfaces to have the same mac e.g. # bonds and its interfaces but we dont support that so just use # first. break os.unlink(ftmp.name) stats = {} total_packets = float(0) if stats_raw: for i, line in enumerate(stats_raw): ret = re.compile(r"\s+[RT]X:\s+.+").findall(line) if ret: ret = re.compile(r"\s*([a-z]+)\s*").findall(line) if ret: for j, column in enumerate(ret): value = int(stats_raw[i + 1].split()[j]) if column == "packets": total_packets = float(value) continue for key in ["dropped", "errors"]: if column == key: if not value: continue percentage = int( (100 / total_packets) * value) # only report if > 0% drops/errors if percentage > 0: stats[key] = ("{} ({}%)".format( value, percentage)) return stats
def get_events(event_name, data_source): ext_event_info = {} events = {} s = FileSearcher() # look for sequence starter if event_name == "network-vif-plugged": sd = SearchDef(r".+\[instance: (\S+)\].+Preparing to wait for " r"external event ({})-(\S+)\s+".format(event_name)) s.add_search_term(sd, data_source) elif event_name == "network-changed": sd = SearchDef( r".+\[instance: (\S+)\].+Received event ({})-(\S+)\s+".format( event_name)) s.add_search_term(sd, data_source) master_results = s.search() # now start a fresh one s = FileSearcher() for file, results in master_results: for result in results: instance_id = result.get(1) event_id = result.get(3) events[event_id] = { "instance_id": instance_id, "data_source": file } for stage in EXT_EVENT_META[event_name]["stages_keys"]: expr = ( r".+\[instance: {}\]\s+{}\s.*\s?event\s+{}-{}.? .+".format( instance_id, stage, event_name, event_id)) tag = "{}_{}_{}".format(instance_id, event_id, stage) sd = SearchDef(expr, tag, hint=event_name) s.add_search_term(sd, data_source) results = s.search() for event_id in events: instance_id = events[event_id]["instance_id"] data_source = events[event_id]["data_source"] stages = get_state_dict(event_name) for stage in stages: tag = "{}_{}_{}".format(instance_id, event_id, stage) r = results.find_by_tag(tag, path=data_source) if r: stages[stage] = True if all([stages[stage] for stage in stages]): result = "succeeded" else: result = "failed" if event_name not in ext_event_info: ext_event_info[event_name] = {} if result not in ext_event_info[event_name]: ext_event_info[event_name][result] = [] ext_event_info[event_name][result].append({ "port": event_id, "instance": instance_id }) if ext_event_info: for event in ext_event_info: if event not in EXT_EVENT_INFO: EXT_EVENT_INFO[event] = {} for result in ext_event_info[event]: s = ext_event_info[event][result] EXT_EVENT_INFO[event][result] = list(s)
from common import ( constants, plugin_yaml, ) from common.searchtools import ( SearchDef, FileSearcher, ) CEPH_LOGS = "var/log/ceph/" DAEMON_INFO = {} # search terms are defined here to make them easier to read. SEARCHES = [ SearchDef( r"^([0-9-]+)\S* \S+ .+ (osd.[0-9]+) reported failed by osd.[0-9]+", tag="osd-reported-failed", hint="reported failed"), SearchDef(r"^([0-9-]+)\S* \S+ .+ (mon.\S+) calling monitor election", tag="mon-election-called", hint="calling monitor election"), SearchDef((r"^([0-9-]+)\S* \S+ .+ ([0-9]+) slow requests are blocked .+ " r"\(REQUEST_SLOW\)"), tag="slow-requests", hint="REQUEST_SLOW"), SearchDef(r"^([0-9-]+)\S* .+ _verify_csum bad .+", tag="crc-err-bluestore", hint="_verify_csum"), SearchDef(r"^([0-9-]+)\S* .+ rocksdb: .+block checksum mismatch:.+", tag="crc-err-rocksdb", hint="checksum mismatch"), SearchDef(