예제 #1
0
def add_known_bugs_to_master_plugin():
    """
    Fetch the current plugin known_bugs.yaml and add it to the master yaml.
    Note that this can only be called once per plugin and is typically
    performed as a final part after all others have executed.
    """
    bugs = _get_known_bugs()
    if bugs and bugs.get(MASTER_YAML_KNOWN_BUGS_KEY):
        plugin_yaml.save_part(bugs, priority=99)
예제 #2
0
def add_issues_to_master_plugin():
    """
    Fetch the current plugin issues.yaml and add it to the master yaml.
    Note that this can only be called once per plugin and is typically
    performed as a final part after all others have executed.
    """
    issues = _get_issues()
    if issues and issues.get(MASTER_YAML_ISSUES_FOUND_KEY):
        plugin_yaml.save_part(issues, priority=99)
예제 #3
0
    def run_report_searches(self):
        self.register_report_searches()
        self.results = self.searcher.search()
        self.run_report_callbacks()
        if not self.resources:
            return

        RABBITMQ_INFO["resources"] = self.resources

    def __call__(self):
        super().__call__()
        self.get_running_services_info()
        self.run_report_searches()


def get_rabbitmq_service_checker():
    # Do this way to make it easier to write unit tests.
    return RabbitMQServiceChecks(RMQ_SERVICES_EXPRS, hint_range=(0, 3))


def get_rabbitmq_package_checker():
    # Do this way to make it easier to write unit tests.
    return RabbitMQPackageChecks(RMQ_PACKAGES)


if __name__ == "__main__":
    get_rabbitmq_service_checker()()
    get_rabbitmq_package_checker()()
    if RABBITMQ_INFO:
        plugin_yaml.save_part(RABBITMQ_INFO, priority=0)
예제 #4
0
        for router in L3HA_CHECKS["keepalived"]["transitions"]:
            transitions = L3HA_CHECKS["keepalived"]["transitions"][router]
            if transitions > threshold:
                max_transitions = max(transitions, max_transitions)
                warn_count += 1

        if warn_count:
            msg = ("{} router(s) have had more than {} vrrp transitions "
                   "(max={}) in the last 24 hours".format(
                       warn_count, threshold, max_transitions))
            issues_utils.add_issue(issue_types.NeutronL3HAWarning(msg))

    def __call__(self):
        self.get_neutron_ha_info()
        self.get_vrrp_transitions()

        # there will likely be a large number of transitions if we look across
        # all time so dont run this check.
        if not constants.USE_ALL_LOGS:
            self.check_vrrp_transitions()


def run_checks():
    return NeutronL3HAChecks()


if __name__ == "__main__":
    run_checks()()
    if L3HA_CHECKS:
        plugin_yaml.save_part({"neutron-l3ha": L3HA_CHECKS}, priority=8)
예제 #5
0
                    continue

            if iface:
                ret = re.compile(r".+\s+([0-9\.]+/[0-9]+).+\s+{}$".format(
                    iface)).match(line)
                if iface in ip_addr_output[i - 3] and ret:
                    NETWORK_INFO[cni_type][iface]["addr"] = ret[1]
                    iface = None

            ret = re.compile(
                r"^\s+vxlan id .+\s+(\S+)\s+dev\s+([0-9a-z]+).+").match(line)

            if cni_type in NETWORK_INFO and ret:
                iface_info = "{}@{}".format(ret[1], ret[2])
                NETWORK_INFO[cni_type][iface]["vxlan"] = iface_info

    def __call__(self):
        super().__call__()
        self.get_network_info()


def get_kubernetes_network_checks():
    # do this way to facilitate unit tests
    return KubernetesNetworkChecks()


if __name__ == "__main__":
    get_kubernetes_network_checks()()
    if NETWORK_INFO:
        plugin_yaml.save_part({"network": NETWORK_INFO}, priority=1)
예제 #6
0
            for k, v in sorted(ifaces.items(), key=lambda e: e[1],
                               reverse=True):
                sorted_dict[k] = v

            KERNEL_INFO["over-mtu-dropped-packets"] = sorted_dict

    def register_mtu_dropped_packets_search(self):
        path = os.path.join(constants.DATA_ROOT, 'var/log/kern.log')
        if constants.USE_ALL_LOGS:
            path = path + "*"

        sdef = SearchDef(r".+\] (\S+): dropped over-mtu packet",
                         hint="dropped", tag="over-mtu")
        self.search_obj.add_search_term(sdef, path)

    def __call__(self):
        self.search_obj = FileSearcher()
        self.register_mtu_dropped_packets_search()
        self.results = self.search_obj.search()
        self.check_mtu_dropped_packets()


def get_kernal_network_checks():
    return KernelNetworkChecks()


if __name__ == "__main__":
    get_kernal_network_checks()()
    if KERNEL_INFO:
        plugin_yaml.save_part(KERNEL_INFO, priority=2)
예제 #7
0
        port_health_info = {}
        for port in self.neutron_phy_ports:
            stats = self._get_port_stats(name=port)
            if stats:
                port_health_info[port] = stats

        if port_health_info:
            health = {"phy-ports": port_health_info}
            if "port-health" in NETWORK_INFO:
                NETWORK_INFO["port-health"].updated(health)
            else:
                NETWORK_INFO["port-health"] = health

    def __call__(self):
        super().__call__()
        self.get_ns_info()
        self.get_config_network_info()
        self.get_neutron_phy_port_health()
        self.get_instances_port_health()


def get_network_checker():
    return OpenstackNetworkChecks()


if __name__ == "__main__":
    get_network_checker()()
    if NETWORK_INFO:
        NETWORK_INFO = {"network": NETWORK_INFO}
        plugin_yaml.save_part(NETWORK_INFO, priority=4)
예제 #8
0
#!/usr/bin/python3
from common import plugin_yaml
from common.checks import PackageChecksBase

from openstack_common import (
    OST_PROJECTS,
    OST_DEP_PKGS,
    OST_PKG_ALIASES,
)


class OpenstackPackageChecks(PackageChecksBase):
    pass


def get_checks():
    package_exprs = OST_PROJECTS + OST_PKG_ALIASES + OST_DEP_PKGS
    return OpenstackPackageChecks(package_exprs)


if __name__ == "__main__":
    c = get_checks()
    info = c()
    if info:
        plugin_yaml.save_part({"dpkg": info}, priority=3)
예제 #9
0
            result = "failed"

        if event_name not in ext_event_info:
            ext_event_info[event_name] = {}

        if result not in ext_event_info[event_name]:
            ext_event_info[event_name][result] = []

        ext_event_info[event_name][result].append({"port": event_id,
                                                   "instance": instance_id})

    if ext_event_info:
        for event in ext_event_info:
            if event not in EXT_EVENT_INFO:
                EXT_EVENT_INFO[event] = {}
            for result in ext_event_info[event]:
                s = ext_event_info[event][result]
                EXT_EVENT_INFO[event][result] = list(s)


if __name__ == "__main__":
    # Supported events - https://docs.openstack.org/api-ref/compute/?expanded=run-events-detail#create-external-events-os-server-external-events  # noqa E501
    data_source = os.path.join(constants.DATA_ROOT,
                               "var/log/nova/nova-compute.log")

    get_events("network-changed", data_source)
    get_events("network-vif-plugged", data_source)
    if EXT_EVENT_INFO:
        EXT_EVENT_INFO = {"os-server-external-events": EXT_EVENT_INFO}
        plugin_yaml.save_part(EXT_EVENT_INFO, priority=2)
예제 #10
0
                self._agent_log_issues[service] = {}

            self._agent_log_issues[service][agent] = e

    def process_results(self, results):
        """Process search results to see if we got any hits."""
        for service in SERVICE_RESOURCES:
            for agent in SERVICE_RESOURCES[service]["daemons"]:
                self._process_agent_results(results, service, agent)

        return self._agent_log_issues


def run_agent_exception_checks():
    s = FileSearcher()
    checks = [CommonAgentChecks(s)]
    for check in checks:
        check.register_search_terms()

    results = s.search()
    for check in checks:
        check_results = check.process_results(results)
        if check_results:
            AGENT_CHECKS_RESULTS["agent-exceptions"] = check_results


if __name__ == "__main__":
    run_agent_exception_checks()
    if AGENT_CHECKS_RESULTS["agent-exceptions"]:
        plugin_yaml.save_part(AGENT_CHECKS_RESULTS, priority=7)
예제 #11
0
            if path is None:
                path = os.path.join(constants.DATA_ROOT, "etc", proj,
                                    "{}.conf".format(proj))

            if os.path.exists(path):
                for line in cli_helpers.safe_readlines(path):
                    ret = re.compile(r"^debug\s*=\s*([A-Za-z]+).*").match(line)
                    if ret:
                        debug_enabled[proj] = cli_helpers.bool_str(ret[1])

        if debug_enabled:
            OPENSTACK_INFO["debug-logging-enabled"] = debug_enabled

    def __call__(self):
        super().__call__()
        self.get_release_info()
        self.get_running_services_info()
        self.get_debug_log_info()


def get_openstack_service_checker():
    # Do this way to make it easier to write unit tests.
    OPENSTACK_SERVICES_EXPRS = OST_SERVICES_EXPRS + OST_SERVICES_DEPS
    return OpenstackServiceChecks(OPENSTACK_SERVICES_EXPRS, hint_range=(0, 3))


if __name__ == "__main__":
    get_openstack_service_checker()()
    if OPENSTACK_INFO:
        plugin_yaml.save_part(OPENSTACK_INFO, priority=0)
예제 #12
0
            if not os.path.exists(cfg):
                continue

            for key in FEATURES[service][module]:
                for line in open(cfg).readlines():
                    ret = re.compile(
                        r"^{}\s*=\s*(.+)\s*".format(key)).match(line)
                    if ret:
                        module_features[key] = cli_helpers.bool_str(ret[1])
                        break

                if key not in module_features:
                    if key in DEFAULTS.get(service, {}).get(module, {}):
                        default = DEFAULTS[service][module][key]
                        module_features[key] = default

            # TODO: only include modules for which there is an actual agent
            #       installed since otherwise their config is irrelevant.
            if module_features:
                if service not in SERVICE_FEATURES:
                    SERVICE_FEATURES[service] = {}

                SERVICE_FEATURES[service][module] = module_features


if __name__ == "__main__":
    get_service_features()
    if SERVICE_FEATURES:
        SERVICE_FEATURES = {"features": SERVICE_FEATURES}
        plugin_yaml.save_part(SERVICE_FEATURES, priority=5)
예제 #13
0
                    failovers[fo_type][ts_date][lb_id] = 1

        for fo_type in failovers:
            # sort each failover by occurences
            for ts_date in failovers[fo_type]:
                d = utils.sorted_dict(failovers[fo_type][ts_date],
                                      key=lambda e: e[1], reverse=True)
                failovers[fo_type][ts_date] = d

            # now sort the dates
            d = utils.sorted_dict(failovers[fo_type])

        if failovers:
            LB_CHECKS["lb-failovers"] = failovers

    def __call__(self):
        if self.core:
            self.get_lb_failovers()
            self.get_hm_amphora_missed_heartbeats()


def run_checks():
    # gate on whether octavia is installed
    return OctaviaLBChecks(["octavia-common"])


if __name__ == "__main__":
    run_checks()()
    if LB_CHECKS:
        plugin_yaml.save_part({"octavia": LB_CHECKS}, priority=9)
예제 #14
0
        for path in self.get_sysfs_cachesets():
            path = os.path.join(path, "cache_available_percent")
            with open(path) as fd:
                value = fd.read().strip()
                limit = CACHE_AVAILABLE_PERCENT_LIMIT_LP1900438
                if int(value) <= limit:
                    msg = (
                        "bcache cache_available_percent ({}) is <= {} - "
                        "this node could be suffering from bug 1900438".format(
                            value, limit))
                    add_issue(BcacheWarning(msg))
                    add_known_bug(1900438, "see BcacheWarning for info")

    def __call__(self):
        self.check_stats()


def get_bcache_dev_checks():
    return BcacheDeviceChecks()


def get_bcache_stats_checks():
    return BcacheStatsChecks()


if __name__ == "__main__":
    get_bcache_dev_checks()()
    get_bcache_stats_checks()()
    if BCACHE_INFO:
        plugin_yaml.save_part(BCACHE_INFO, priority=1)
예제 #15
0
            if machine in ps_machines:
                machines_running.add("{} (version={})".format(
                    machine, version))
            else:
                machines_stopped.add(machine)

        if machines_running:
            JUJU_MACHINE_INFO["machines"]["running"] = list(machines_running)

        if machines_stopped:
            JUJU_MACHINE_INFO["machines"]["stopped"] = list(machines_stopped)

        if not machines_running and (machines_stopped
                                     or self.get_local_running_units):
            msg = ("there is no Juju machined running on this host but it "
                   "seems there should be")
            add_issue(JujuWarning(msg))

    def __call__(self):
        self.get_machine_info()


def get_machine_checks():
    return JujuMachineChecks()


if __name__ == "__main__":
    get_machine_checks()()
    if JUJU_MACHINE_INFO["machines"]:
        plugin_yaml.save_part(JUJU_MACHINE_INFO, priority=0)
예제 #16
0
    def __call__(self):
        super().__call__()
        data_source = os.path.join(constants.DATA_ROOT, CEPH_LOGS, 'ceph*.log')
        if constants.USE_ALL_LOGS:
            data_source = "{}*".format(data_source)

        s = FileSearcher()
        for search in SEARCHES:
            s.add_search_term(search, data_source)

        self.results = s.search()
        self.process_osd_failure_reports()
        self.process_mon_elections()
        self.process_slow_requests()
        self.process_crc_bluestore()
        self.process_crc_rocksdb()
        self.process_long_heartbeat()
        self.process_heartbeat_no_reply()


def get_ceph_daemon_log_checker():
    # Do this way to make it easier to write unit tests.
    return CephDaemonLogChecks(CEPH_SERVICES_EXPRS)


if __name__ == "__main__":
    get_ceph_daemon_log_checker()()
    if DAEMON_INFO:
        DAEMON_INFO = {"daemon-events": DAEMON_INFO}
        plugin_yaml.save_part(DAEMON_INFO, priority=2)
예제 #17
0
                container_info.append(container)

        if container_info:
            KUBERNETES_INFO["containers"] = container_info

    def __call__(self):
        self.get_pod_info()
        self.get_container_info()


def get_kubernetes_package_checker():
    # Do this way to make it easier to write unit tests.
    return KubernetesPackageChecks(None)


def get_kubernetes_service_checker():
    # Do this way to make it easier to write unit tests.
    return KubernetesServiceChecks()


def get_kubernetes_resource_checker():
    return KubernetesResourceChecks()


if __name__ == "__main__":
    get_kubernetes_service_checker()()
    get_kubernetes_package_checker()()
    get_kubernetes_resource_checker()()
    if KUBERNETES_INFO:
        plugin_yaml.save_part(KUBERNETES_INFO, priority=0)
예제 #18
0
    def check_log_errors(self):
        path = os.path.join(constants.DATA_ROOT,
                            'var/log/rabbitmq/rabbit@*.log')
        if constants.USE_ALL_LOGS:
            path = f"{path}*"

        self.searcher.add_search_term(SearchDef(r".+ \S+_partitioned_network",
                                                tag="partitions"),
                                      path=path)
        results = self.searcher.search()
        if results.find_by_tag("partitions"):
            msg = ("cluster either has or has had partitions - check "
                   "cluster_status")
            issues_utils.add_issue(issue_types.RabbitMQWarning(msg))

    def __call__(self):
        super().__call__()
        self.check_log_errors()


def get_rabbitmq_cluster_checker():
    # Do this way to make it easier to write unit tests.
    return RabbitMQClusterChecks()


if __name__ == "__main__":
    get_rabbitmq_cluster_checker()()
    if CLUSTER_INFO:
        plugin_yaml.save_part(CLUSTER_INFO, priority=1)
예제 #19
0
            CEPH_INFO["mixed_crush_buckets"] = bad_buckets

    def __call__(self):
        super().__call__()
        self.get_osd_info()
        self.get_ceph_pg_imbalance()
        self.get_ceph_versions_mismatch()
        self.get_crushmap_mixed_buckets()


def get_service_checker():
    # Do this way to make it easier to write unit tests.
    return CephServiceChecks(CEPH_SERVICES_EXPRS)


def get_pkg_checker():
    return CephPackageChecks(CEPH_PKGS_CORE)


def get_osd_checker():
    # Do this way to make it easier to write unit tests.
    return CephOSDChecks(CEPH_SERVICES_EXPRS)


if __name__ == "__main__":
    get_service_checker()()
    get_pkg_checker()()
    get_osd_checker()()
    if CEPH_INFO:
        plugin_yaml.save_part({"ceph": CEPH_INFO}, priority=0)
예제 #20
0
                msg = (
                    "found {} ovs interfaces with 100% dropped packets".format(
                        len(all_dropped)))
                issues_utils.add_issue(issue_types.OpenvSwitchWarning(msg))

            if all_errors:
                msg = (
                    "found {} ovs interfaces with 100% packet errors".format(
                        len(all_errors)))
                issues_utils.add_issue(issue_types.OpenvSwitchWarning(msg))

            stats_sorted = {}
            for k in sorted(stats):
                stats_sorted[k] = stats[k]

            OVS_INFO["port-stats"] = stats_sorted


def get_checks():
    return [
        OpenvSwitchvSwitchdChecks(),
        OpenvSwitchDaemonChecksCommon(),
        OpenvSwitchDPChecks()
    ]


if __name__ == "__main__":
    [c() for c in get_checks()]
    if OVS_INFO:
        plugin_yaml.save_part(OVS_INFO, priority=1)
예제 #21
0
                ret = re.compile(r".+load average:\s+(.+)").match(line)
                if ret:
                    SYSTEM_INFO["load"] = ret[1]
                    break
        df_output = cli_helpers.get_df()
        if df_output:
            for line in df_output:
                ret = re.compile(r"(.+\/$)").match(line)
                if ret:
                    SYSTEM_INFO["rootfs"] = ret[1]
                    break

        if self.unattended_upgrades_enabled:
            SYSTEM_INFO['unattended-upgrades'] = "ENABLED"
        else:
            SYSTEM_INFO['unattended-upgrades'] = "disabled"

    def __call__(self):
        self.get_system_info()


def get_system_checks():
    # do this way to facilitate unit tests
    return SystemChecks()


if __name__ == "__main__":
    get_system_checks()()
    if SYSTEM_INFO:
        plugin_yaml.save_part(SYSTEM_INFO, priority=0)
예제 #22
0
                extra += "node{}: {}".format(
                    node, list_to_str(self.numa.cores(node)))

            extra += "\n{}: {}".format(self.cpu_dedicated_set_name,
                                       list_to_str(self.cpu_dedicated_set))

            self.results.add_info(
                "{} has cores from > 1 numa node".format(
                    self.cpu_dedicated_set_name), extra)

        if self.isolcpus or self.cpuaffinity:
            total_isolated = self.isolcpus.union(self.cpuaffinity)
            nonisolated = set(total_isolated).intersection()
            if len(nonisolated) <= 4:
                self.results.add_warn("Host has only {} cores unpinned. This "
                                      "might cause unintended performance "
                                      "problems".format(len(nonisolated)))

    def get_results(self):
        self.results.get()


if __name__ == "__main__":
    checker = CPUPinningChecker()
    checker.run_cpu_pinning_checks()
    checker.get_results()
    if CPU_PINNING_INFO:
        CPU_PINNING_INFO = {"cpu-pinning-checks": CPU_PINNING_INFO}
        plugin_yaml.save_part(CPU_PINNING_INFO, priority=6)
예제 #23
0
#!/usr/bin/python3
from common import plugin_yaml
from common.checks import PackageChecksBase

from openstack_common import (
    OST_PROJECTS,
    OST_DEP_PKGS,
    OST_PKG_ALIASES,
)

OST_PKG_INFO = {}


class OpenstackPackageChecks(PackageChecksBase):
    def __call__(self):
        p = self.packages
        if p:
            OST_PKG_INFO["dpkg"] = p


def get_checks():
    package_exprs = OST_PROJECTS + OST_PKG_ALIASES + OST_DEP_PKGS
    return OpenstackPackageChecks(package_exprs)


if __name__ == "__main__":
    get_checks()()
    if OST_PKG_INFO:
        plugin_yaml.save_part(OST_PKG_INFO, priority=3)
예제 #24
0
class JujuCharmChecks(JujuChecksBase):
    def get_charm_versions(self):
        if not os.path.exists(JUJU_LIB_PATH):
            return

        versions = []
        for entry in glob.glob(os.path.join(JUJU_LIB_PATH,
                                            CHARM_MANIFEST_GLOB)):
            for manifest in os.listdir(entry):
                base = os.path.basename(manifest)
                ret = re.compile(r".+_(\S+)-([0-9]+)$").match(base)
                if ret:
                    versions.append("{}-{}".format(ret[1], ret[2]))

        if versions:
            CHARM_VERSIONS["charm-versions"] = sorted(versions)

    def __call__(self):
        self.get_charm_versions()


def get_charm_checks():
    return JujuCharmChecks()


if __name__ == "__main__":
    get_charm_checks()()
    if CHARM_VERSIONS["charm-versions"]:
        plugin_yaml.save_part(CHARM_VERSIONS, priority=1)