def add_known_bugs_to_master_plugin(): """ Fetch the current plugin known_bugs.yaml and add it to the master yaml. Note that this can only be called once per plugin and is typically performed as a final part after all others have executed. """ bugs = get_known_bugs() if bugs: plugin_yaml.dump(bugs)
def add_issues_to_master_plugin(): """ Fetch the current plugin issues.yaml and add it to the master yaml. Note that this can only be called once per plugin and is typically performed as a final part after all others have executed. """ issues = _get_issues() if issues: plugin_yaml.dump(issues)
def filter_master_yaml(): with open(constants.MASTER_YAML_OUT) as fd: master_yaml = yaml.safe_load(fd) # Create a master list of issues and bugs adding info about which plugin # added them. filtered = {} for plugin in master_yaml: for key in FILTER_SCHEMA: if key in master_yaml[plugin]: if key not in filtered: filtered[key] = {} if plugin not in filtered[key]: filtered[key][plugin] = [] items = master_yaml[plugin][key] for item in items: filtered[key][plugin].append(item) with open(constants.MASTER_YAML_OUT, 'w') as fd: if filtered: fd.write(plugin_yaml.dump(filtered, stdout=False)) fd.write("\n") else: fd.write("")
def filter_master_yaml(): with open(constants.MASTER_YAML_OUT) as fd: master_yaml = yaml.safe_load(fd) # Create a master list of issues and bugs adding info about which plugin # added them. filtered = {} for plugin in master_yaml: for key in FILTER_SCHEMA: if key in master_yaml[plugin]: if key not in filtered: filtered[key] = {} items = master_yaml[plugin][key] for item in items: for _key in item: plug_key = "({}) {}".format(plugin, _key) filtered[key][plug_key] = item[_key] with open(constants.MASTER_YAML_OUT, 'w') as fd: if filtered: fd.write(plugin_yaml.dump(filtered, ensure_master_has_plugin=False, stdout=False)) fd.write("\n") else: fd.write("")
def get_pod_info(): pod_info = [] pods_path = os.path.join(constants.DATA_ROOT, "var/log/pods") if os.path.exists(pods_path): for pod in os.listdir(pods_path): pod_info.append(pod) if pod_info: KUBERNETES_INFO["pods"] = pod_info def get_container_info(): container_info = [] containers_path = os.path.join(constants.DATA_ROOT, "var/log/containers") if os.path.exists(containers_path): for container in os.listdir(containers_path): container_info.append(container) if container_info: KUBERNETES_INFO["containers"] = container_info if __name__ == "__main__": get_kubernetes_service_checker()() get_snap_info() get_pod_info() get_container_info() if KUBERNETES_INFO: plugin_yaml.dump(KUBERNETES_INFO)
# and is not necessarily representative of current state. if success_count > 10000: pcent = int(fail_count / (success_count / 100)) if pcent > 10: msg = ("failures are at {}% of successes (see {})".format( pcent, VMSTAT)) KERNEL_INFO["memory-checks"]["compaction"] = msg issue = issue_types.MemoryWarning("compaction " + msg) issues_utils.add_issue(issue) get_slab_major_consumers() else: KERNEL_INFO["memory-checks"] = "no issues found" def get_systemd_info(): path = os.path.join(constants.DATA_ROOT, "etc/systemd/system.conf") if os.path.exists(path): KERNEL_INFO["systemd"] = {"CPUAffinity": "not set"} for line in open(path): ret = re.compile("^CPUAffinity=(.+)").match(line) if ret: KERNEL_INFO["systemd"]["CPUAffinity"] = ret[1] if __name__ == "__main__": get_kernel_info() get_systemd_info() if KERNEL_INFO: plugin_yaml.dump(KERNEL_INFO)
def __call__(self): super().__call__() data_source = os.path.join(constants.DATA_ROOT, CEPH_LOGS, 'ceph*.log') if constants.USE_ALL_LOGS: data_source = "{}*".format(data_source) s = FileSearcher() for search in SEARCHES: s.add_search_term(search, data_source) self.results = s.search() self.process_osd_failure_reports() self.process_mon_elections() self.process_slow_requests() self.process_crc_bluestore() self.process_crc_rocksdb() self.process_long_heartbeat() self.process_heartbeat_no_reply() def get_ceph_daemon_log_checker(): # Do this way to make it easier to write unit tests. return CephDaemonLogChecks(CEPH_SERVICES_EXPRS) if __name__ == "__main__": get_ceph_daemon_log_checker()() if DAEMON_INFO: DAEMON_INFO = {"daemon-events": DAEMON_INFO} plugin_yaml.dump(DAEMON_INFO)
helpers, plugin_yaml, ) from openstack_common import OST_PROJECTS, OST_DEP_PKGS DEP_LIST = OST_PROJECTS + OST_DEP_PKGS PKG_INFO = [] def get_pkg_info(): dpkg_l = helpers.get_dpkg_l() if not dpkg_l: return for line in dpkg_l: for dep in DEP_LIST: ret = re.compile( r"^ii\s+(python3?-)?({}[0-9a-z\-]*)\s+(\S+)\s+.+".format( dep)).match(line) if ret: pyprefix = ret[1] or "" PKG_INFO.append("{}{} {}".format(pyprefix, ret[2], ret[3])) if __name__ == "__main__": get_pkg_info() if PKG_INFO: PKG_INFO = {"dpkg": PKG_INFO} plugin_yaml.dump(PKG_INFO)
log_machines.add(ret[1]) combined_machines = ps_machines.union(log_machines) for machine in combined_machines: agent_conf = os.path.join(constants.DATA_ROOT, "var/lib/juju/agents/machine-{}/agent.conf". format(machine)) version = "unknown" if os.path.exists(agent_conf): for line in open(agent_conf).readlines(): ret = re.compile(r"upgradedToVersion:\s+(.+)").match(line) if ret: version = ret[1] if machine in ps_machines: machines_running.add("{} (version={})".format(machine, version)) else: machines_stopped.add(machine) if machines_running: JUJU_MACHINE_INFO["machines"]["running"] = list(machines_running) if machines_stopped: JUJU_MACHINE_INFO["machines"]["stopped"] = list(machines_stopped) if __name__ == "__main__": get_machine_info() if JUJU_MACHINE_INFO: plugin_yaml.dump(JUJU_MACHINE_INFO)
ret = re.compile(r"^CPU\(s\):\s+([0-9]+)\s*.*").match(line) if ret: SYSTEM_INFO["num-cpus"] = int(ret[1]) break uptime_output = helpers.get_uptime() if uptime_output: for line in uptime_output: ret = re.compile(r".+load average:\s+(.+)").match(line) if ret: SYSTEM_INFO["load"] = ret[1] break df_output = helpers.get_df() if df_output: for line in df_output: ret = re.compile(r"(.+\/$)").match(line) if ret: SYSTEM_INFO["rootfs"] = ret[1] break if unattended_upgrades_enabled(): SYSTEM_INFO['unattended-upgrades'] = "ENABLED" else: SYSTEM_INFO['unattended-upgrades'] = "disabled" if __name__ == "__main__": get_system_info() if SYSTEM_INFO: plugin_yaml.dump(SYSTEM_INFO)
extra += "node{}: {}".format( node, list_to_str(self.numa.cores(node))) extra += "\n{}: {}".format(self.cpu_dedicated_set_name, list_to_str(self.cpu_dedicated_set)) self.results.add_info( "{} has cores from > 1 numa node".format( self.cpu_dedicated_set_name), extra) if self.isolcpus or self.cpuaffinity: total_isolated = self.isolcpus.union(self.cpuaffinity) nonisolated = set(total_isolated).intersection() if len(nonisolated) <= 4: self.results.add_warn("Host has only {} cores unpinned. This " "might cause unintended performance " "problems".format(len(nonisolated))) def get_results(self): self.results.get() if __name__ == "__main__": checker = CPUPinningChecker() checker.run_cpu_pinning_checks() checker.get_results() if CPU_PINNING_INFO: CPU_PINNING_INFO = {"cpu-pinning-checks": CPU_PINNING_INFO} plugin_yaml.dump(CPU_PINNING_INFO)
if __name__ == "__main__": s = FileSearcher() common_checks = CommonAgentChecks(s) common_checks.add_agents_issues_search_terms() neutron_checks = NeutronAgentChecks(s) neutron_checks.add_rpc_loop_search_terms() neutron_checks.add_router_event_search_terms() results = s.search() neutron_checks.process_rpc_loop_results(results) neutron_checks.process_router_event_results(results) common_checks.process_agent_issues_results(results) AGENT_CHECKS = {"agent-checks": {}} if common_checks.agent_log_issues: AGENT_CHECKS["agent-checks"]["agent-issues"] = \ common_checks.agent_log_issues if neutron_checks.ovs_agent_info: AGENT_CHECKS["agent-checks"]["neutron-ovs-agent"] = \ neutron_checks.ovs_agent_info if neutron_checks.l3_agent_info: AGENT_CHECKS["agent-checks"]["neutron-l3-agent"] = \ neutron_checks.l3_agent_info if AGENT_CHECKS["agent-checks"]: plugin_yaml.dump(AGENT_CHECKS)
def get_instances_port_health(): instances = get_instances_info() if not instances: return port_health_info = {} for uuid in instances: for port in instances[uuid]['ports']: stats = get_port_stats(mac=port["mac"]) if stats: if uuid not in port_health_info: port_health_info[uuid] = {} port_health_info[uuid][port["mac"]] = stats if port_health_info: NETWORK_INFO["port-health"] = { "num-vms-checked": len(instances), "stats": port_health_info } if __name__ == "__main__": get_ns_info() get_config_network_info() get_instances_port_health() if NETWORK_INFO: NETWORK_INFO = {"network": NETWORK_INFO} plugin_yaml.dump(NETWORK_INFO)
type_ids.append(buckets[item]["type_id"]) # verify if the type_id list contain mixed type id if type_ids.count(type_ids[0]) != len(type_ids): bad_buckets.append(buckets[bid]["name"]) if bad_buckets: issue = CephCrushWarning("mixed crush buckets indentified (see " "--storage for more info)") issues_utils.add_issue(issue) CEPH_INFO["mixed_crush_buckets"] = bad_buckets def __call__(self): super().__call__() self.get_running_services_info() self.get_osd_info() self.get_ceph_pg_imbalance() self.get_ceph_versions_mismatch() self.get_crushmap_mixed_buckets() def get_ceph_checker(): # Do this way to make it easier to write unit tests. return CephChecks(CEPH_SERVICES_EXPRS, use_ps_axo_flags=True) if __name__ == "__main__": get_ceph_checker()() if CEPH_INFO: plugin_yaml.dump({"ceph": CEPH_INFO})
for i, line in enumerate(ip_addr_output): if "flannel" in line: if "flannel" not in NETWORK_INFO: NETWORK_INFO["flannel"] = {} ret = re.compile(r".+(flannel\.[0-9]+):").match(line) if ret: iface = ret[1] NETWORK_INFO["flannel"][iface] = {} continue if iface: ret = re.compile(r".+\s+([0-9\.]+/[0-9]+).+\s+{}$". format(iface)).match(line) if iface in ip_addr_output[i - 3] and ret: NETWORK_INFO["flannel"][iface]["addr"] = ret[1] iface = None ret = re.compile(r"^\s+vxlan id .+\s+([0-9\.]+)\s+dev\s+([0-9a-z]+).+" ).match(line) if "flannel" in NETWORK_INFO and ret: NETWORK_INFO["flannel"][iface]["vxlan"] = "{}@{}".format(ret[1], ret[2]) if __name__ == "__main__": get_network_info() if NETWORK_INFO: plugin_yaml.dump({"network": NETWORK_INFO})
if not os.path.exists(cfg): continue for key in FEATURES[service][module]: for line in open(cfg).readlines(): ret = re.compile( r"^{}\s*=\s*(.+)\s*".format(key)).match(line) if ret: module_features[key] = helpers.bool_str(ret[1]) break if key not in module_features: if key in DEFAULTS.get(service, {}).get(module, {}): default = DEFAULTS[service][module][key] module_features[key] = default # TODO: only include modules for which there is an actual agent # installed since otherwise their config is irrelevant. if module_features: if service not in SERVICE_FEATURES: SERVICE_FEATURES[service] = {} SERVICE_FEATURES[service][module] = module_features if __name__ == "__main__": get_service_features() if SERVICE_FEATURES: SERVICE_FEATURES = {"features": SERVICE_FEATURES} plugin_yaml.dump(SERVICE_FEATURES)
from common import ( plugin_yaml, ) from juju_common import ( CHARM_MANIFEST_GLOB, JUJU_LIB_PATH, ) CHARM_VERSIONS = {"charm-versions": []} def get_charm_versions(): if not os.path.exists(JUJU_LIB_PATH): return versions = [] for entry in glob.glob(os.path.join(JUJU_LIB_PATH, CHARM_MANIFEST_GLOB)): for manifest in os.listdir(entry): base = os.path.basename(manifest) ret = re.compile(r".+_(\S+)-([0-9]+)$").match(base) if ret: versions.append("{}-{}".format(ret[1], ret[2])) if versions: CHARM_VERSIONS["charm-versions"] = sorted(versions) if __name__ == "__main__": get_charm_versions() if CHARM_VERSIONS: plugin_yaml.dump(CHARM_VERSIONS)
helpers, plugin_yaml, ) BCACHE_INFO = {} def get_bcache_info(): for type in ["bcache", "nvme"]: for line in helpers.get_ls_lanR_sys_block(): ret = re.compile( r".+[0-9:]+\s+({}[0-9a-z]+)\s+.+".format(type)).match(line) if ret: if type not in BCACHE_INFO: BCACHE_INFO[type] = {} devname = ret[1] BCACHE_INFO[type][devname] = {} for line in helpers.get_udevadm_info_dev(devname): ret = re.compile(r".+\s+disk/by-dname/(.+)").match(line) if ret: BCACHE_INFO[type][devname]["dname"] = ret[1] elif "dname" not in BCACHE_INFO[type][devname]: BCACHE_INFO[type][devname]["dname"] = "<notfound>" if __name__ == "__main__": get_bcache_info() if BCACHE_INFO: plugin_yaml.dump(BCACHE_INFO)
#!/usr/bin/python3 import re from common import ( helpers, plugin_yaml, ) VM_INFO = [] def get_vm_info(): for line in helpers.get_ps(): ret = re.compile(".+product=OpenStack Nova.+").match(line) if ret: ret = re.compile(r".+uuid\s+([a-z0-9\-]+)[\s,]+.+").match(ret[0]) if ret: VM_INFO.append(ret[1]) if __name__ == "__main__": get_vm_info() if VM_INFO: VM_INFO = {"instances": VM_INFO} plugin_yaml.dump(VM_INFO)
# remove units from units_local_not_running that are just old versions of # the one currently running for unit in units_local_not_running: app = get_app_from_unit(unit) if not app or app not in app_local: units_local_not_running_filtered.add(unit) # dedup unit_nonlocal for unit in unit_nonlocal: app = get_app_from_unit(unit) version = app_nonlocal[app] if version == get_unit_version(unit): units_nonlocal_dedup.add(unit) if units_local: JUJU_UNIT_INFO["units"]["local"] = list(sorted(units_local)) if units_local_not_running_filtered: JUJU_UNIT_INFO["units"]["stopped"] = \ list(sorted(units_local_not_running_filtered)) if units_nonlocal_dedup: JUJU_UNIT_INFO["units"]["lxd"] = \ list(sorted(units_nonlocal_dedup)) if __name__ == "__main__": get_unit_info() if JUJU_UNIT_INFO: plugin_yaml.dump(JUJU_UNIT_INFO)
if event_name not in ext_event_info: ext_event_info[event_name] = {} if result not in ext_event_info[event_name]: ext_event_info[event_name][result] = [] ext_event_info[event_name][result].append({ "port": event_id, "instance": instance_id }) if ext_event_info: for event in ext_event_info: if event not in EXT_EVENT_INFO: EXT_EVENT_INFO[event] = {} for result in ext_event_info[event]: s = ext_event_info[event][result] EXT_EVENT_INFO[event][result] = list(s) if __name__ == "__main__": # Supported events - https://docs.openstack.org/api-ref/compute/?expanded=run-events-detail#create-external-events-os-server-external-events # noqa E501 data_source = os.path.join(constants.DATA_ROOT, "var/log/nova/nova-compute.log") get_events("network-changed", data_source) get_events("network-vif-plugged", data_source) if EXT_EVENT_INFO: EXT_EVENT_INFO = {"os-server-external-events": EXT_EVENT_INFO} plugin_yaml.dump(EXT_EVENT_INFO)
def get_debug_log_info(): debug_enabled = {} for proj in OST_PROJECTS: path = OST_ETC_OVERRIDES.get(proj) if path is None: path = os.path.join(constants.DATA_ROOT, "etc", proj, "{}.conf".format(proj)) if os.path.exists(path): for line in helpers.safe_readlines(path): ret = re.compile(r"^debug\s*=\s*([A-Za-z]+).*").match(line) if ret: debug_enabled[proj] = helpers.bool_str(ret[1]) if debug_enabled: OPENSTACK_INFO["debug-logging-enabled"] = debug_enabled def get_openstack_service_checker(): # Do this way to make it easier to write unit tests. OPENSTACK_SERVICES_EXPRS = OST_SERVICES + OST_SERVICES_DEPS return OpenstackServiceChecks(OPENSTACK_SERVICES_EXPRS, hint_range=(0, 3)) if __name__ == "__main__": get_release_info() get_openstack_service_checker()() get_debug_log_info() if OPENSTACK_INFO: plugin_yaml.dump(OPENSTACK_INFO)