def required_images(self, task_vars):
        deployment_type = get_var(task_vars, "deployment_type")
        # FIXME(juanvallejo): we should handle gracefully with a proper error
        # message when given an unexpected value for `deployment_type`.
        image_base_name = self.docker_image_base[deployment_type]

        openshift_release = get_var(task_vars, "openshift_release")
        # FIXME(juanvallejo): this variable is not required when the
        # installation is non-containerized. The example inventories have it
        # commented out. We should handle gracefully and with a proper error
        # message when this variable is required and not set.
        openshift_image_tag = get_var(task_vars, "openshift_image_tag")

        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")

        if is_containerized:
            images = set(self.containerized_docker_images(image_base_name, openshift_release))
        else:
            images = set(self.rpm_docker_images(image_base_name, openshift_release))

        # append images with qualified image tags to our list of required images.
        # these are images with a (v0.0.0.0) tag, rather than a standard release
        # format tag (v0.0). We want to check this set in both containerized and
        # non-containerized installations.
        images.update(
            self.qualified_docker_images(self.image_from_base_name(image_base_name), "v" + openshift_image_tag)
        )

        return images
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        free_bytes = self.openshift_available_disk(ansible_mounts)

        recommended_min = max(
            self.recommended_disk_space_bytes.get(name, 0)
            for name in group_names)
        configured_min = int(
            get_var(task_vars, "openshift_check_min_host_disk_gb",
                    default=0)) * 10**9
        min_free_bytes = configured_min or recommended_min

        if free_bytes < min_free_bytes:
            return {
                'failed':
                True,
                'msg':
                ('Available disk space ({:.1f} GB) for the volume containing '
                 '"/var" is below minimum recommended space ({:.1f} GB)'
                 ).format(
                     float(free_bytes) / 10**9,
                     float(min_free_bytes) / 10**9)
            }

        return {}
Ejemplo n.º 3
0
    def ensure_dependencies(self, task_vars):
        """
        Ensure that docker-related packages exist, but not on atomic hosts
        (which would not be able to install but should already have them).
        Returns: msg, failed, changed
        """
        if get_var(task_vars, "openshift", "common", "is_atomic"):
            return "", False, False

        # NOTE: we would use the "package" module but it's actually an action plugin
        # and it's not clear how to invoke one of those. This is about the same anyway:
        pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum")
        result = self.module_executor(pkg_manager, {
            "name": self.dependencies,
            "state": "present"
        }, task_vars)
        msg = result.get("msg", "")
        if result.get("failed"):
            if "No package matching" in msg:
                msg = "Ensure that all required dependencies can be installed via `yum`.\n"
            msg = ("Unable to install required packages on this host:\n"
                   "    {deps}\n{msg}").format(deps=',\n    '.join(
                       self.dependencies),
                                               msg=msg)
        failed = result.get("failed", False) or result.get("rc", 0) != 0
        changed = result.get("changed", False)
        return msg, failed, changed
    def required_images(self, task_vars):
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        image_info = self.deployment_image_info[deployment_type]

        openshift_release = get_var(task_vars,
                                    "openshift_release",
                                    default="latest")
        openshift_image_tag = get_var(task_vars, "openshift_image_tag")
        is_containerized = get_var(task_vars, "openshift", "common",
                                   "is_containerized")

        images = set(
            self.required_docker_images(
                image_info["namespace"],
                image_info["name"],
                ["registry-console"] if "enterprise" in deployment_type else
                [],  # include enterprise-only image names
                openshift_release,
                is_containerized,
            ))

        # append images with qualified image tags to our list of required images.
        # these are images with a (v0.0.0.0) tag, rather than a standard release
        # format tag (v0.0). We want to check this set in both containerized and
        # non-containerized installations.
        images.update(
            self.required_qualified_docker_images(
                image_info["namespace"],
                image_info["name"],
                openshift_image_tag,
            ), )

        return images
Ejemplo n.º 5
0
    def required_images(self, task_vars):
        deployment_type = get_var(task_vars, "deployment_type")
        image_base_name = self.docker_image_base[deployment_type]

        openshift_release = get_var(task_vars, "openshift_release")
        openshift_image_tag = get_var(task_vars, "openshift_image_tag")

        is_containerized = get_var(task_vars, "openshift", "common",
                                   "is_containerized")

        if is_containerized:
            images = set(
                self.containerized_docker_images(image_base_name,
                                                 openshift_release))
        else:
            images = set(
                self.rpm_docker_images(image_base_name, openshift_release))

        # append images with qualified image tags to our list of required images.
        # these are images with a (v0.0.0.0) tag, rather than a standard release
        # format tag (v0.0). We want to check this set in both containerized and
        # non-containerized installations.
        images.update(
            self.qualified_docker_images(
                self.image_from_base_name(image_base_name),
                "v" + openshift_image_tag))

        return images
Ejemplo n.º 6
0
 def run(self, tmp, task_vars):
     args = {
         "requested_openshift_release": get_var(task_vars, "openshift_release", default=''),
         "openshift_deployment_type": get_var(task_vars, "openshift_deployment_type"),
         "rpm_prefix": get_var(task_vars, "openshift", "common", "service_type"),
     }
     return self.execute_module("aos_version", args, tmp, task_vars)
    def required_images(self, task_vars):
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        image_info = self.deployment_image_info[deployment_type]

        openshift_release = get_var(task_vars, "openshift_release", default="latest")
        openshift_image_tag = get_var(task_vars, "openshift_image_tag")
        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")

        images = set(self.required_docker_images(
            image_info["namespace"],
            image_info["name"],
            ["registry-console"] if "enterprise" in deployment_type else [],  # include enterprise-only image names
            openshift_release,
            is_containerized,
        ))

        # append images with qualified image tags to our list of required images.
        # these are images with a (v0.0.0.0) tag, rather than a standard release
        # format tag (v0.0). We want to check this set in both containerized and
        # non-containerized installations.
        images.update(
            self.required_qualified_docker_images(
                image_info["namespace"],
                image_info["name"],
                openshift_image_tag,
            ),
        )

        return images
Ejemplo n.º 8
0
    def ensure_dependencies(self, task_vars):
        """
        Ensure that docker-related packages exist, but not on atomic hosts
        (which would not be able to install but should already have them).
        Returns: msg, failed, changed
        """
        if get_var(task_vars, "openshift", "common", "is_atomic"):
            return "", False, False

        # NOTE: we would use the "package" module but it's actually an action plugin
        # and it's not clear how to invoke one of those. This is about the same anyway:
        result = self.execute_module(
            get_var(task_vars, "ansible_pkg_mgr", default="yum"),
            {"name": self.dependencies, "state": "present"},
            task_vars=task_vars,
        )
        msg = result.get("msg", "")
        if result.get("failed"):
            if "No package matching" in msg:
                msg = "Ensure that all required dependencies can be installed via `yum`.\n"
            msg = (
                "Unable to install required packages on this host:\n"
                "    {deps}\n{msg}"
            ).format(deps=',\n    '.join(self.dependencies), msg=msg)
        failed = result.get("failed", False) or result.get("rc", 0) != 0
        changed = result.get("changed", False)
        return msg, failed, changed
Ejemplo n.º 9
0
 def is_first_master(task_vars):
     """Run only on first master and only when logging is configured. Returns: bool"""
     logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True)
     # Note: It would be nice to use membership in oo_first_master group, however for now it
     # seems best to avoid requiring that setup and just check this is the first master.
     hostname = get_var(task_vars, "ansible_ssh_host") or [None]
     masters = get_var(task_vars, "groups", "masters", default=None) or [None]
     return logging_deployed and masters[0] == hostname
Ejemplo n.º 10
0
 def is_active(cls, task_vars):
     """Only run on hosts that depend on Docker."""
     is_containerized = get_var(task_vars, "openshift", "common",
                                "is_containerized")
     is_node = "nodes" in get_var(task_vars, "group_names", default=[])
     return super(DockerHostMixin,
                  cls).is_active(task_vars) and (is_containerized
                                                 or is_node)
Ejemplo n.º 11
0
 def is_first_master(task_vars):
     """Run only on first master and only when logging is configured. Returns: bool"""
     logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True)
     # Note: It would be nice to use membership in oo_first_master group, however for now it
     # seems best to avoid requiring that setup and just check this is the first master.
     hostname = get_var(task_vars, "ansible_ssh_host") or [None]
     masters = get_var(task_vars, "groups", "masters", default=None) or [None]
     return logging_deployed and masters[0] == hostname
Ejemplo n.º 12
0
    def run(self, tmp, task_vars):
        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
        openshift_release = get_var(task_vars, "openshift_release")

        args = {
            "prefix": rpm_prefix,
            "version": openshift_release,
        }
        return self.module_executor("aos_version", args, tmp, task_vars)
Ejemplo n.º 13
0
    def is_active(cls, task_vars):
        """Skip hosts that do not have etcd in their group names."""
        group_names = get_var(task_vars, "group_names", default=[])
        valid_group_names = "etcd" in group_names

        version = get_var(task_vars, "openshift", "common", "short_version")
        valid_version = version in ("3.4", "3.5", "1.4", "1.5")

        return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
Ejemplo n.º 14
0
 def run(self, tmp, task_vars):
     args = {
         "requested_openshift_release":
         get_var(task_vars, "openshift_release", default=''),
         "openshift_deployment_type":
         get_var(task_vars, "openshift_deployment_type"),
         "rpm_prefix":
         get_var(task_vars, "openshift", "common", "service_type"),
     }
     return self.execute_module("aos_version", args, tmp, task_vars)
    def known_docker_registries(task_vars):
        docker_facts = get_var(task_vars, "openshift", "docker")
        regs = set(docker_facts["additional_registries"])

        deployment_type = get_var(task_vars, "openshift_deployment_type")
        if deployment_type == "origin":
            regs.update(["docker.io"])
        elif "enterprise" in deployment_type:
            regs.update(["registry.access.redhat.com"])

        return list(regs)
    def required_images(task_vars):
        """
        Determine which images we expect to need for this host.
        Returns: a set of required images like 'openshift/origin:v3.6'

        The thorny issue of determining the image names from the variables is under consideration
        via https://github.com/openshift/openshift-ansible/issues/4415

        For now we operate as follows:
        * For containerized components (master, node, ...) we look at the deployment type and
          use openshift/origin or openshift3/ose as the base for those component images. The
          version is openshift_image_tag as determined by the openshift_version role.
        * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
          it is defined; otherwise we again use the base that depends on the deployment type.
        Registry is not included in constructed images. It may be in oreg_url or etcd image.
        """
        required = set()
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        host_groups = get_var(task_vars, "group_names")
        image_tag = get_var(task_vars, "openshift_image_tag")
        image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
        if not image_info:
            return required

        # template for images that run on top of OpenShift
        image_url = "{}/{}-{}:{}".format(image_info["namespace"],
                                         image_info["name"], "${component}",
                                         "${version}")
        image_url = get_var(task_vars, "oreg_url", default="") or image_url
        if 'nodes' in host_groups:
            for suffix in NODE_IMAGE_SUFFIXES:
                required.add(
                    image_url.replace("${component}",
                                      suffix).replace("${version}", image_tag))
            # The registry-console is for some reason not prefixed with ose- like the other components.
            # Nor is it versioned the same, so just look for latest.
            # Also a completely different name is used for Origin.
            required.add(image_info["registry_console_image"])

        # images for containerized components
        if get_var(task_vars, "openshift", "common", "is_containerized"):
            components = set()
            if 'nodes' in host_groups:
                components.update(["node", "openvswitch"])
            if 'masters' in host_groups:  # name is "origin" or "ose"
                components.add(image_info["name"])
            for component in components:
                required.add("{}/{}:{}".format(image_info["namespace"],
                                               component, image_tag))
            if 'etcd' in host_groups:  # special case, note it is the same for origin/enterprise
                required.add("registry.access.redhat.com/rhel7/etcd"
                             )  # and no image tag

        return required
    def known_docker_registries(task_vars):
        docker_facts = get_var(task_vars, "openshift", "docker")
        regs = set(docker_facts["additional_registries"])

        deployment_type = get_var(task_vars, "openshift_deployment_type")
        if deployment_type == "origin":
            regs.update(["docker.io"])
        elif "enterprise" in deployment_type:
            regs.update(["registry.access.redhat.com"])

        return list(regs)
    def known_docker_registries(task_vars):
        """Build a list of docker registries available according to inventory vars."""
        docker_facts = get_var(task_vars, "openshift", "docker")
        regs = set(docker_facts["additional_registries"])

        deployment_type = get_var(task_vars, "openshift_deployment_type")
        if deployment_type == "origin":
            regs.update(["docker.io"])
        elif "enterprise" in deployment_type:
            regs.update(["registry.access.redhat.com"])

        return list(regs)
Ejemplo n.º 19
0
    def known_docker_registries(task_vars):
        """Build a list of docker registries available according to inventory vars."""
        docker_facts = get_var(task_vars, "openshift", "docker")
        regs = set(docker_facts["additional_registries"])

        deployment_type = get_var(task_vars, "openshift_deployment_type")
        if deployment_type == "origin":
            regs.update(["docker.io"])
        elif "enterprise" in deployment_type:
            regs.update(["registry.access.redhat.com"])

        return list(regs)
Ejemplo n.º 20
0
    def run(self, tmp, task_vars):
        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
        group_names = get_var(task_vars, "group_names", default=[])

        packages = set()

        if "masters" in group_names:
            packages.update(self.master_packages(rpm_prefix))
        if "nodes" in group_names:
            packages.update(self.node_packages(rpm_prefix))

        args = {"packages": sorted(set(packages))}
        return self.execute_module("check_yum_update", args, tmp, task_vars)
    def required_images(task_vars):
        """
        Determine which images we expect to need for this host.
        Returns: a set of required images like 'openshift/origin:v3.6'

        The thorny issue of determining the image names from the variables is under consideration
        via https://github.com/openshift/openshift-ansible/issues/4415

        For now we operate as follows:
        * For containerized components (master, node, ...) we look at the deployment type and
          use openshift/origin or openshift3/ose as the base for those component images. The
          version is openshift_image_tag as determined by the openshift_version role.
        * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if
          it is defined; otherwise we again use the base that depends on the deployment type.
        Registry is not included in constructed images. It may be in oreg_url or etcd image.
        """
        required = set()
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        host_groups = get_var(task_vars, "group_names")
        # containerized etcd may not have openshift_image_tag, see bz 1466622
        image_tag = get_var(task_vars, "openshift_image_tag", default="latest")
        image_info = DEPLOYMENT_IMAGE_INFO[deployment_type]
        if not image_info:
            return required

        # template for images that run on top of OpenShift
        image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}")
        image_url = get_var(task_vars, "oreg_url", default="") or image_url
        if 'nodes' in host_groups:
            for suffix in NODE_IMAGE_SUFFIXES:
                required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag))
            # The registry-console is for some reason not prefixed with ose- like the other components.
            # Nor is it versioned the same, so just look for latest.
            # Also a completely different name is used for Origin.
            required.add(image_info["registry_console_image"])

        # images for containerized components
        if get_var(task_vars, "openshift", "common", "is_containerized"):
            components = set()
            if 'nodes' in host_groups:
                components.update(["node", "openvswitch"])
            if 'masters' in host_groups:  # name is "origin" or "ose"
                components.add(image_info["name"])
            for component in components:
                required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag))
            if 'etcd' in host_groups:  # special case, note it is the same for origin/enterprise
                required.add("registry.access.redhat.com/rhel7/etcd")  # and no image tag

        return required
Ejemplo n.º 22
0
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        ansible_mounts = {mount['mount']: mount for mount in ansible_mounts}

        user_config = get_var(task_vars, "openshift_check_min_host_disk_gb", default={})
        try:
            # For backwards-compatibility, if openshift_check_min_host_disk_gb
            # is a number, then it overrides the required config for '/var'.
            number = float(user_config)
            user_config = {
                '/var': {
                    'masters': number,
                    'nodes': number,
                    'etcd': number,
                },
            }
        except TypeError:
            # If it is not a number, then it should be a nested dict.
            pass

        # TODO: as suggested in
        # https://github.com/openshift/openshift-ansible/pull/4436#discussion_r122180021,
        # maybe we could support checking disk availability in paths that are
        # not part of the official recommendation but present in the user
        # configuration.
        for path, recommendation in self.recommended_disk_space_bytes.items():
            free_bytes = self.free_bytes(path, ansible_mounts)
            recommended_bytes = max(recommendation.get(name, 0) for name in group_names)

            config = user_config.get(path, {})
            # NOTE: the user config is in GB, but we compare bytes, thus the
            # conversion.
            config_bytes = max(config.get(name, 0) for name in group_names) * 10**9
            recommended_bytes = config_bytes or recommended_bytes

            if free_bytes < recommended_bytes:
                free_gb = float(free_bytes) / 10**9
                recommended_gb = float(recommended_bytes) / 10**9
                return {
                    'failed': True,
                    'msg': (
                        'Available disk space in "{}" ({:.1f} GB) '
                        'is below minimum recommended ({:.1f} GB)'
                    ).format(path, free_gb, recommended_gb)
                }

        return {}
    def is_active(cls, task_vars):
        """Skip hosts with unsupported deployment types."""
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO

        return super(DockerImageAvailability,
                     cls).is_active(task_vars) and has_valid_deployment_type
Ejemplo n.º 24
0
 def is_active(cls, task_vars):
     """Skip hosts that do not have recommended memory requirements."""
     group_names = get_var(task_vars, "group_names", default=[])
     has_memory_recommendation = bool(
         set(group_names).intersection(cls.recommended_memory_bytes))
     return super(MemoryAvailability,
                  cls).is_active(task_vars) and has_memory_recommendation
Ejemplo n.º 25
0
    def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None):
        """
        Execute an 'oc' command in the remote host.
        Returns: output of command and namespace,
        or raises OpenShiftCheckException on error
        """
        config_base = get_var(task_vars, "openshift", "common", "config_base")
        args = {
            "namespace": namespace,
            "config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
            "cmd": cmd_str,
            "extra_args": list(extra_args) if extra_args else [],
        }

        result = execute_module("ocutil", args, task_vars)
        if result.get("failed"):
            msg = (
                'Unexpected error using `oc` to validate the logging stack components.\n'
                'Error executing `oc {cmd}`:\n'
                '{error}'
            ).format(cmd=args['cmd'], error=result['result'])

            if result['result'] == '[Errno 2] No such file or directory':
                msg = (
                    "This host is supposed to be a master but does not have the `oc` command where expected.\n"
                    "Has an installation been run on this host yet?"
                )
            raise OpenShiftCheckException(msg)

        return result.get("result", "")
Ejemplo n.º 26
0
    def check_fluentd(self, pods, task_vars):
        """Verify fluentd is running everywhere. Returns: error string"""

        node_selector = get_var(task_vars, 'openshift_logging_fluentd_nodeselector',
                                default='logging-infra-fluentd=true')

        nodes_by_name, error = self.get_nodes_by_name(task_vars)

        if error:
            return error
        fluentd_nodes, error = self._filter_fluentd_labeled_nodes(nodes_by_name, node_selector)
        if error:
            return error

        error_msgs = []
        error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars)
        if error:
            error_msgs.append(error)
        error = self._check_nodes_have_fluentd(pods, fluentd_nodes)
        if error:
            error_msgs.append(error)
        error = self._check_fluentd_pods_running(pods)
        if error:
            error_msgs.append(error)

        # Make sure there are no extra fluentd pods
        if len(pods) > len(fluentd_nodes):
            error_msgs.append(
                'There are more Fluentd pods running than nodes labeled.\n'
                'This may not cause problems with logging but it likely indicates something wrong.'
            )

        return '\n'.join(error_msgs)
Ejemplo n.º 27
0
    def run(self, tmp, task_vars):
        """Check various things and gather errors. Returns: result as hash"""

        self.logging_namespace = get_var(task_vars,
                                         "openshift_logging_namespace",
                                         default="logging")
        fluentd_pods, error = super(Fluentd, self).get_pods_for_component(
            self.execute_module,
            self.logging_namespace,
            "fluentd",
            task_vars,
        )
        if error:
            return {"failed": True, "changed": False, "msg": error}
        check_error = self.check_fluentd(fluentd_pods, task_vars)

        if check_error:
            msg = ("The following Fluentd deployment issue was found:"
                   "\n-------\n"
                   "{}".format(check_error))
            return {"failed": True, "changed": False, "msg": msg}

        # TODO(lmeyer): run it all again for the ops cluster
        return {
            "failed": False,
            "changed": False,
            "msg": 'No problems found with Fluentd deployment.'
        }
Ejemplo n.º 28
0
    def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None):
        """
        Execute an 'oc' command in the remote host.
        Returns: output of command and namespace,
        or raises OpenShiftCheckException on error
        """
        config_base = get_var(task_vars, "openshift", "common", "config_base")
        args = {
            "namespace": namespace,
            "config_file": os.path.join(config_base, "master", "admin.kubeconfig"),
            "cmd": cmd_str,
            "extra_args": list(extra_args) if extra_args else [],
        }

        result = execute_module("ocutil", args, None, task_vars)
        if result.get("failed"):
            msg = (
                'Unexpected error using `oc` to validate the logging stack components.\n'
                'Error executing `oc {cmd}`:\n'
                '{error}'
            ).format(cmd=args['cmd'], error=result['result'])

            if result['result'] == '[Errno 2] No such file or directory':
                msg = (
                    "This host is supposed to be a master but does not have the `oc` command where expected.\n"
                    "Has an installation been run on this host yet?"
                )
            raise OpenShiftCheckException(msg)

        return result.get("result", "")
Ejemplo n.º 29
0
    def run(self, tmp, task_vars):
        is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
        unit = "etcd_container" if is_containerized else "etcd"

        log_matchers = [{
            "start_regexp": r"Starting Etcd Server",
            "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
            "unit": unit
        }]

        match = self.execute_module("search_journalctl", {
            "log_matchers": log_matchers,
        }, task_vars)

        if match.get("matched"):
            msg = ("Higher than normal etcd traffic detected.\n"
                   "OpenShift 3.4 introduced an increase in etcd traffic.\n"
                   "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
                   "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
            return {"failed": True, "msg": msg}

        if match.get("failed"):
            return {"failed": True, "msg": "\n".join(match.get("errors"))}

        return {}
Ejemplo n.º 30
0
    def run(self, tmp, task_vars):
        """Check various things and gather errors. Returns: result as hash"""

        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
        kibana_pods, error = super(Kibana, self).get_pods_for_component(
            self.execute_module,
            self.logging_namespace,
            "kibana",
            task_vars,
        )
        if error:
            return {"failed": True, "changed": False, "msg": error}
        check_error = self.check_kibana(kibana_pods)

        if not check_error:
            check_error = self._check_kibana_route(task_vars)

        if check_error:
            msg = ("The following Kibana deployment issue was found:"
                   "\n-------\n"
                   "{}".format(check_error))
            return {"failed": True, "changed": False, "msg": msg}

        # TODO(lmeyer): run it all again for the ops cluster
        return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}
Ejemplo n.º 31
0
    def _check_dm_usage(self, driver_status, task_vars):
        """
        Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
        implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
        devicemapper storage. The LV is "thin" because it does not use all available storage
        from its VG, instead expanding as needed; so to determine available space, we gather
        current usage as the Docker API reports for the driver as well as space available for
        expansion in the pool's VG.
        Usage within the LV is divided into pools allocated to data and metadata, either of which
        could run out of space first; so we check both.
        """
        vals = dict(
            vg_free=self._get_vg_free(driver_status.get("Pool Name"), task_vars),
            data_used=driver_status.get("Data Space Used"),
            data_total=driver_status.get("Data Space Total"),
            metadata_used=driver_status.get("Metadata Space Used"),
            metadata_total=driver_status.get("Metadata Space Total"),
        )

        # convert all human-readable strings to bytes
        for key, value in vals.copy().items():
            try:
                vals[key + "_bytes"] = self._convert_to_bytes(value)
            except ValueError as err:  # unlikely to hit this from API info, but just to be safe
                return {
                    "failed": True,
                    "values": vals,
                    "msg": "Could not interpret {} value '{}' as bytes: {}".format(key, value, str(err))
                }

        # determine the threshold percentages which usage should not exceed
        for name, default in [("data", self.max_thinpool_data_usage_percent),
                              ("metadata", self.max_thinpool_meta_usage_percent)]:
            percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default)
            try:
                vals[name + "_threshold"] = float(percent)
            except ValueError:
                return {
                    "failed": True,
                    "msg": "Specified thinpool {} usage limit '{}' is not a percentage".format(name, percent)
                }

        # test whether the thresholds are exceeded
        messages = []
        for name in ["data", "metadata"]:
            vals[name + "_pct_used"] = 100 * vals[name + "_used_bytes"] / (
                vals[name + "_total_bytes"] + vals["vg_free_bytes"])
            if vals[name + "_pct_used"] > vals[name + "_threshold"]:
                messages.append(
                    "Docker thinpool {name} usage percentage {pct:.1f} "
                    "is higher than threshold {thresh:.1f}.".format(
                        name=name,
                        pct=vals[name + "_pct_used"],
                        thresh=vals[name + "_threshold"],
                    ))
                vals["failed"] = True

        vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
        return vals
Ejemplo n.º 32
0
    def check_dm_usage(self, driver_status, task_vars):
        """Check usage thresholds for Docker dm storage driver. Return: result dict.
        Backing assumptions: We expect devicemapper to be backed by an auto-expanding thin pool
        implemented as an LV in an LVM2 VG. This is how docker-storage-setup currently configures
        devicemapper storage. The LV is "thin" because it does not use all available storage
        from its VG, instead expanding as needed; so to determine available space, we gather
        current usage as the Docker API reports for the driver as well as space available for
        expansion in the pool's VG.
        Usage within the LV is divided into pools allocated to data and metadata, either of which
        could run out of space first; so we check both.
        """
        vals = dict(
            vg_free=self.get_vg_free(driver_status.get("Pool Name"), task_vars),
            data_used=driver_status.get("Data Space Used"),
            data_total=driver_status.get("Data Space Total"),
            metadata_used=driver_status.get("Metadata Space Used"),
            metadata_total=driver_status.get("Metadata Space Total"),
        )

        # convert all human-readable strings to bytes
        for key, value in vals.copy().items():
            try:
                vals[key + "_bytes"] = self.convert_to_bytes(value)
            except ValueError as err:  # unlikely to hit this from API info, but just to be safe
                return {
                    "failed": True,
                    "values": vals,
                    "msg": "Could not interpret {} value '{}' as bytes: {}".format(key, value, str(err))
                }

        # determine the threshold percentages which usage should not exceed
        for name, default in [("data", self.max_thinpool_data_usage_percent),
                              ("metadata", self.max_thinpool_meta_usage_percent)]:
            percent = get_var(task_vars, "max_thinpool_" + name + "_usage_percent", default=default)
            try:
                vals[name + "_threshold"] = float(percent)
            except ValueError:
                return {
                    "failed": True,
                    "msg": "Specified thinpool {} usage limit '{}' is not a percentage".format(name, percent)
                }

        # test whether the thresholds are exceeded
        messages = []
        for name in ["data", "metadata"]:
            vals[name + "_pct_used"] = 100 * vals[name + "_used_bytes"] / (
                vals[name + "_total_bytes"] + vals["vg_free_bytes"])
            if vals[name + "_pct_used"] > vals[name + "_threshold"]:
                messages.append(
                    "Docker thinpool {name} usage percentage {pct:.1f} "
                    "is higher than threshold {thresh:.1f}.".format(
                        name=name,
                        pct=vals[name + "_pct_used"],
                        thresh=vals[name + "_threshold"],
                    ))
                vals["failed"] = True

        vals["msg"] = "\n".join(messages or ["Thinpool usage is within thresholds."])
        return vals
Ejemplo n.º 33
0
 def is_active(cls, task_vars):
     """Skip hosts that do not have recommended disk space requirements."""
     group_names = get_var(task_vars, "group_names", default=[])
     active_groups = set()
     for recommendation in cls.recommended_disk_space_bytes.values():
         active_groups.update(recommendation.keys())
     has_disk_space_recommendation = bool(active_groups.intersection(group_names))
     return super(DiskAvailability, cls).is_active(task_vars) and has_disk_space_recommendation
Ejemplo n.º 34
0
    def _check_elasticsearch_diskspace(self, pods_by_name, task_vars):
        """
        Exec into an ES pod and query the diskspace on the persistent volume.
        Returns: list of errors
        """
        error_msgs = []
        for pod_name in pods_by_name.keys():
            df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name)
            disk_output = self._exec_oc(df_cmd, [], task_vars)
            lines = disk_output.splitlines()
            # expecting one header looking like 'IUse% Use%' and one body line
            body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$'
            if len(lines) != 2 or len(lines[0].split()) != 2 or not re.match(body_re, lines[1]):
                error_msgs.append(
                    'Could not retrieve storage usage from logging ES pod "{pod}".\n'
                    'Response to `df` command was:\n{output}'.format(pod=pod_name, output=disk_output)
                )
                continue
            inode_pct, disk_pct = re.match(body_re, lines[1]).groups()

            inode_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_inode_pct', default='90')
            if int(inode_pct) >= int(inode_pct_thresh):
                error_msgs.append(
                    'Inode percent usage on the storage volume for logging ES pod "{pod}"\n'
                    '  is {pct}, greater than threshold {limit}.\n'
                    '  Note: threshold can be specified in inventory with {param}'.format(
                        pod=pod_name,
                        pct=str(inode_pct),
                        limit=str(inode_pct_thresh),
                        param='openshift_check_efk_es_inode_pct',
                    ))
            disk_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_storage_pct', default='80')
            if int(disk_pct) >= int(disk_pct_thresh):
                error_msgs.append(
                    'Disk percent usage on the storage volume for logging ES pod "{pod}"\n'
                    '  is {pct}, greater than threshold {limit}.\n'
                    '  Note: threshold can be specified in inventory with {param}'.format(
                        pod=pod_name,
                        pct=str(disk_pct),
                        limit=str(disk_pct_thresh),
                        param='openshift_check_efk_es_storage_pct',
                    ))

        return error_msgs
Ejemplo n.º 35
0
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        free_bytes = self.openshift_available_disk(ansible_mounts)

        recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)
        configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9
        min_free_bytes = configured_min or recommended_min

        if free_bytes < min_free_bytes:
            return {
                'failed': True,
                'msg': (
                    'Available disk space ({:.1f} GB) for the volume containing '
                    '"/var" is below minimum recommended space ({:.1f} GB)'
                ).format(float(free_bytes) / 10**9, float(min_free_bytes) / 10**9)
            }

        return {}
    def ensure_dependencies(self, task_vars):
        if get_var(task_vars, "openshift", "common", "is_atomic"):
            return "", False, False

        result = self.module_executor("yum", {
            "name": self.dependencies,
            "state": "latest"
        }, task_vars)
        return result.get("msg", ""), result.get(
            "failed", False) or result.get("rc", 0) != 0, result.get("changed")
Ejemplo n.º 37
0
    def _etcd_mount_info(self, task_vars):
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}

        for path in self.supported_mount_paths:
            if path in mounts:
                return mounts[path]

        paths = ', '.join(sorted(mounts)) or 'none'
        msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
        raise OpenShiftCheckException(msg)
Ejemplo n.º 38
0
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * 10**6

        min_memory_bytes = max(
            self.recommended_memory_bytes.get(name, 0) for name in group_names)

        if total_memory_bytes < min_memory_bytes:
            return {
                'failed':
                True,
                'msg':
                ('Available memory ({available:.1f} GB) '
                 'below recommended value ({recommended:.1f} GB)').format(
                     available=float(total_memory_bytes) / 10**9,
                     recommended=float(min_memory_bytes) / 10**9,
                 ),
            }

        return {}
Ejemplo n.º 39
0
    def _etcd_mount_info(self, task_vars):
        ansible_mounts = get_var(task_vars, "ansible_mounts")
        mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}

        for path in self.supported_mount_paths:
            if path in mounts:
                return mounts[path]

        paths = ', '.join(sorted(mounts)) or 'none'
        msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(
            paths)
        raise OpenShiftCheckException(msg)
Ejemplo n.º 40
0
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * MIB

        recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names)
        configured_min = float(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * GIB
        min_memory_bytes = configured_min or recommended_min

        if total_memory_bytes + self.memtotal_adjustment < min_memory_bytes:
            return {
                'failed': True,
                'msg': (
                    'Available memory ({available:.1f} GiB) is too far '
                    'below recommended value ({recommended:.1f} GiB)'
                ).format(
                    available=float(total_memory_bytes) / GIB,
                    recommended=float(min_memory_bytes) / GIB,
                ),
            }

        return {}
Ejemplo n.º 41
0
    def run(self, tmp, task_vars):
        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
        openshift_release = get_var(task_vars, "openshift_release", default='')
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        check_multi_minor_release = deployment_type in ['openshift-enterprise']

        args = {
            "package_list": [
                {
                    "name": "openvswitch",
                    "version": self.get_required_ovs_version(task_vars),
                    "check_multi": False,
                },
                {
                    "name": "docker",
                    "version": self.get_required_docker_version(task_vars),
                    "check_multi": False,
                },
                {
                    "name": "{}".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
                {
                    "name": "{}-master".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
                {
                    "name": "{}-node".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
            ],
        }

        return self.execute_module("aos_version",
                                   args,
                                   tmp=tmp,
                                   task_vars=task_vars)
Ejemplo n.º 42
0
 def _check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars):
     """Note if nodes are not labeled as expected. Returns: error string"""
     intended_nodes = get_var(task_vars, 'openshift_logging_fluentd_hosts', default=['--all'])
     if not intended_nodes or '--all' in intended_nodes:
         intended_nodes = nodes_by_name.keys()
     nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys())
     if nodes_missing_labels:
         return (
             'The following nodes are supposed to be labeled with {label} but are not:\n'
             '  {nodes}\n'
             'Fluentd will not aggregate logs from these nodes.'
         ).format(label=node_selector, nodes=', '.join(nodes_missing_labels))
     return None
Ejemplo n.º 43
0
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        ansible_mounts = get_var(task_vars, "ansible_mounts")

        min_free_bytes = max(
            self.recommended_disk_space_bytes.get(name, 0)
            for name in group_names)
        free_bytes = self.openshift_available_disk(ansible_mounts)

        if free_bytes < min_free_bytes:
            return {
                'failed':
                True,
                'msg':
                ('Available disk space ({:.1f} GB) for the volume containing '
                 '"/var" is below minimum recommended space ({:.1f} GB)'
                 ).format(
                     float(free_bytes) / 10**9,
                     float(min_free_bytes) / 10**9)
            }

        return {}
Ejemplo n.º 44
0
    def run(self, tmp, task_vars):
        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
        openshift_release = get_var(task_vars, "openshift_release", default='')
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        check_multi_minor_release = deployment_type in ['openshift-enterprise']

        args = {
            "package_list": [
                {
                    "name": "openvswitch",
                    "version": self.get_required_ovs_version(task_vars),
                    "check_multi": False,
                },
                {
                    "name": "docker",
                    "version": self.get_required_docker_version(task_vars),
                    "check_multi": False,
                },
                {
                    "name": "{}".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
                {
                    "name": "{}-master".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
                {
                    "name": "{}-node".format(rpm_prefix),
                    "version": openshift_release,
                    "check_multi": check_multi_minor_release,
                },
            ],
        }

        return self.execute_module("aos_version", args, tmp, task_vars)
    def run(self, tmp, task_vars):
        group_names = get_var(task_vars, "group_names")
        total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * MIB

        recommended_min = max(
            self.recommended_memory_bytes.get(name, 0) for name in group_names)
        configured_min = float(
            get_var(task_vars, "openshift_check_min_host_memory_gb",
                    default=0)) * GIB
        min_memory_bytes = configured_min or recommended_min

        if total_memory_bytes + self.memtotal_adjustment < min_memory_bytes:
            return {
                'failed':
                True,
                'msg':
                ('Available memory ({available:.1f} GiB) is too far '
                 'below recommended value ({recommended:.1f} GiB)').format(
                     available=float(total_memory_bytes) / GIB,
                     recommended=float(min_memory_bytes) / GIB,
                 ),
            }

        return {}
Ejemplo n.º 46
0
    def check_overlay_usage(self, docker_info, task_vars):
        """Check disk usage on OverlayFS backing store volume. Return: result dict."""
        path = docker_info.get("DockerRootDir", "/var/lib/docker") + "/" + docker_info["Driver"]

        threshold = get_var(task_vars, "max_overlay_usage_percent", default=self.max_overlay_usage_percent)
        try:
            threshold = float(threshold)
        except ValueError:
            return {
                "failed": True,
                "msg": "Specified 'max_overlay_usage_percent' is not a percentage: {}".format(threshold),
            }

        mount = self.find_ansible_mount(path, get_var(task_vars, "ansible_mounts"))
        try:
            free_bytes = mount['size_available']
            total_bytes = mount['size_total']
            usage = 100.0 * (total_bytes - free_bytes) / total_bytes
        except (KeyError, ZeroDivisionError):
            return {
                "failed": True,
                "msg": "The ansible_mount found for path {} is invalid.\n"
                       "This is likely to be an Ansible bug. The record was:\n"
                       "{}".format(path, json.dumps(mount, indent=2)),
            }

        if usage > threshold:
            return {
                "failed": True,
                "msg": (
                    "For Docker OverlayFS mount point {path},\n"
                    "usage percentage {pct:.1f} is higher than threshold {thresh:.1f}."
                ).format(path=mount["mount"], pct=usage, thresh=threshold)
            }

        return {}
Ejemplo n.º 47
0
    def run(self, tmp, task_vars):
        etcd_mountpath = self._get_etcd_mountpath(get_var(task_vars, "ansible_mounts"))
        etcd_avail_diskspace = etcd_mountpath["size_available"]
        etcd_total_diskspace = etcd_mountpath["size_total"]

        etcd_imagedata_size_limit = get_var(task_vars,
                                            "etcd_max_image_data_size_bytes",
                                            default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace)))

        etcd_is_ssl = get_var(task_vars, "openshift", "master", "etcd_use_ssl", default=False)
        etcd_port = get_var(task_vars, "openshift", "master", "etcd_port", default=2379)
        etcd_hosts = get_var(task_vars, "openshift", "master", "etcd_hosts")

        config_base = get_var(task_vars, "openshift", "common", "config_base")

        cert = task_vars.get("etcd_client_cert", config_base + "/master/master.etcd-client.crt")
        key = task_vars.get("etcd_client_key", config_base + "/master/master.etcd-client.key")
        ca_cert = task_vars.get("etcd_client_ca_cert", config_base + "/master/master.etcd-ca.crt")

        for etcd_host in list(etcd_hosts):
            args = {
                "size_limit_bytes": etcd_imagedata_size_limit,
                "paths": ["/openshift.io/images", "/openshift.io/imagestreams"],
                "host": etcd_host,
                "port": etcd_port,
                "protocol": "https" if etcd_is_ssl else "http",
                "version_prefix": "/v2",
                "allow_redirect": True,
                "ca_cert": ca_cert,
                "cert": {
                    "cert": cert,
                    "key": key,
                },
            }

            etcdkeysize = self.module_executor("etcdkeysize", args, task_vars)

            if etcdkeysize.get("rc", 0) != 0 or etcdkeysize.get("failed"):
                msg = 'Failed to retrieve stats for etcd host "{host}": {reason}'
                reason = etcdkeysize.get("msg")
                if etcdkeysize.get("module_stderr"):
                    reason = etcdkeysize["module_stderr"]

                msg = msg.format(host=etcd_host, reason=reason)
                return {"failed": True, "changed": False, "msg": msg}

            if etcdkeysize["size_limit_exceeded"]:
                limit = self._to_gigabytes(etcd_imagedata_size_limit)
                msg = ("The size of OpenShift image data stored in etcd host "
                       "\"{host}\" exceeds the maximum recommended limit of {limit:.2f} GB. "
                       "Use the `oadm prune images` command to cleanup unused Docker images.")
                return {"failed": True, "msg": msg.format(host=etcd_host, limit=limit)}

        return {"changed": False}
Ejemplo n.º 48
0
    def run(self, tmp, task_vars):
        mount_info = self._etcd_mount_info(task_vars)
        available = mount_info["size_available"]
        total = mount_info["size_total"]
        used = total - available

        threshold = get_var(
            task_vars,
            "etcd_device_usage_threshold_percent",
            default=self.default_threshold_percent
        )

        used_percent = 100.0 * used / total

        if used_percent > threshold:
            device = mount_info.get("device", "unknown")
            mount = mount_info.get("mount", "unknown")
            msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format(
                used_percent, threshold, device, mount
            )
            return {"failed": True, "msg": msg}

        return {"changed": False}
    def is_active(cls, task_vars):
        """Skip hosts with unsupported deployment types."""
        deployment_type = get_var(task_vars, "openshift_deployment_type")
        has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO

        return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type
Ejemplo n.º 50
0
 def is_containerized(task_vars):
     return get_var(task_vars, "openshift", "common", "is_containerized")
Ejemplo n.º 51
0
 def is_active(cls, task_vars):
     is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
     return super(NotContainerizedMixin, cls).is_active(task_vars) and not is_containerized
Ejemplo n.º 52
0
def test_get_var_ok(task_vars, keys, expected):
    assert get_var(task_vars, *keys) == expected
Ejemplo n.º 53
0
def test_get_var_error(task_vars, missing_keys):
    with pytest.raises(OpenShiftCheckException):
        get_var(task_vars, *missing_keys)
Ejemplo n.º 54
0
def test_get_var_default(task_vars, missing_keys):
    default = object()
    assert get_var(task_vars, *missing_keys, default=default) == default