Ejemplo n.º 1
0
    def generate_snmp_spec(self, spec):
        """
        Return spec content for snmp-destination

        Args:
            spec (Dict): snmp-destination service spec config

        Returns:
            service_spec (Str)

        Example::

        specs:
              - service_type: snmp-destination
                spec:
                  credentials:
                    snmp_v3_auth_username: myadmin
                    snmp_v3_auth_password: mypassword
        """
        template = self._get_template("snmp")
        destination_node = spec["spec"].pop("snmp_destination", None)
        node = get_node_by_id(self.cluster, destination_node)
        if destination_node:
            spec["spec"]["snmp_destination"] = self.get_addr(node) + ":162"
        node_installer = get_node_by_id(self.cluster, "node1")
        cmd = "cephadm shell ceph fsid"
        out, err = node_installer.exec_command(sudo=True, cmd=cmd)
        LOG.info(f"fsid: {out}")
        engine_id = out.replace("-", "")
        if engine_id:
            spec["spec"]["engine_id"] = engine_id
        return template.render(spec=spec)
Ejemplo n.º 2
0
    def generate_host_spec(self, spec):
        """
        Return hosts spec content based on host config

        Args:
            spec (Dict): hosts specification

        Returns:
            hosts_spec (Str)

        Example::

            spec:
              - service_type: host
                address: true
                labels: apply-all-labels
                nodes:
                    - node2
                    - node3
        """
        template = self._get_template("host")
        hosts = []
        address = spec.get("address")
        labels = spec.get("labels")
        for node_name in spec["nodes"]:
            host = dict()
            node = get_node_by_id(self.cluster, node_name)
            host["hostname"] = self.get_hostname(node)
            if address:
                host["address"] = self.get_addr(node)
            if labels:
                host["labels"] = self.get_labels(node)
            hosts.append(host)

        return template.render(hosts=hosts)
Ejemplo n.º 3
0
def run(ceph_cluster, **kw):

    ansible_dir = "/usr/share/ceph-ansible"
    ceph_installer = ceph_cluster.get_ceph_object("installer")
    config = kw.get("config")
    build = config.get("build", config.get("rhbuild"))
    daemon_to_kill = config.get("daemon-to-kill")
    daemon = config.get("daemon")
    instance = config.get("instance")
    playbook = f"shrink-{daemon}.yml"
    short_names = []

    # For all daemons node name is required but for osds the osd.id is required to shrink daemon
    if daemon != "osd":
        for node in daemon_to_kill:
            short_name = get_node_by_id(ceph_cluster, node).shortname
            short_names.append(short_name)
            node_name = ",".join(short_names)
        log.info(
            f"Executing {playbook} playbook to shrink {node_name} daemons")
    else:
        daemons_to_kill = ",".join(daemon_to_kill)
        log.info(
            f"Executing {playbook} playbook to shrink {daemons_to_kill} daemons"
        )

    check_inventory = f"sudo cat {ansible_dir}/hosts"

    # Display inventory before shrinking
    outbuf, _ = ceph_installer.exec_command(cmd=check_inventory)
    log.info(f"Inventory {outbuf}")

    # Based of RHCS version, use the playbook path
    if build.startswith("4"):
        playbook = f"infrastructure-playbooks/{playbook}"
    else:
        ceph_installer.exec_command(
            sudo=True,
            cmd=
            f"cd {ansible_dir};cp -R {ansible_dir}/infrastructure-playbooks/{playbook} .",
        )

    cmd = f"cd {ansible_dir}; ansible-playbook -vvvv -e ireallymeanit=yes {playbook}"

    # adding extra vars to the shrink playbook
    if daemon == "osd":
        cmd += f" -e {daemon}_to_kill={daemons_to_kill} -i hosts"
    elif daemon == "rgw":
        cmd += f" -e {daemon}_to_kill={node_name}.rgw{instance} -i hosts"
    else:
        cmd += f" -e {daemon}_to_kill={node_name} -i hosts"

    # Executing shrink playbook depending on provided daemon
    err = ceph_installer.exec_command(cmd=cmd, long_running=True)

    # If playbook execution fails log error
    if err != 0:
        log.error(f"Failed during ansible playbook execution: {playbook}\n")
        return err
    return 0
Ejemplo n.º 4
0
def translate_to_ip(clusters, cluster_name: str, string: str) -> str:
    """
    Return the string after replacing ip: <node> pattern with the IP address of <node>.

    In this method, the pattern {ip:<cluster>#<node>} would be replaced with the value
    of node.ipaddress.

    Args:
        clusters:       Ceph cluster instance
        cluster_name:   Name of the cluster under test.
        string:         String that needs to be searched

    Return:
        String with node IDs replaced with IP addresses
    """
    replaced_string = string
    node_list = re.findall("{node_ip:(.+?)}", string)

    for node in node_list:
        node_ = node
        if "#" in node:
            cluster_name, node = node.split("#")

        node_ip = get_node_by_id(clusters[cluster_name], node).ip_address
        replacement_pattern = "{node_ip:" + node_ + "}"
        replaced_string = re.sub(replacement_pattern, node_ip, replaced_string)

    return replaced_string
def down_osd_with_umount(osd_id, ceph_cluster):
    """Make OSD down by un-mounting OSD device path.

    - Find NODE where OSD resides
    - Stop OSD service using systemctl "sudo systemctl stop ceph-osd@{id}.service".
    - Disable Daemon "sudo systemctl disable ceph-osd{id}.service".
    - Find mount path using ceph OSD Id.
    - umount OSD device path.
    - Validate mount is been removed.

    Args:
        osd_id: OSD id
        ceph_cluster: ceph cluster object
    """
    mon_node = ceph_cluster.get_ceph_object("mon")
    osd_info = find_osd_by_id(osd_id, mon_node)

    # find OSD node
    osd_node = get_node_by_id(ceph_cluster, osd_info["host"])

    # Stop and disable OSD daemon
    systemctl(osd_node, "stop", f"ceph-osd@{osd_id}")
    systemctl(osd_node, "disable", f"ceph-osd@{osd_id}")

    # umount OSD device
    osd_node.exec_command(
        cmd=f"umount /var/lib/ceph/osd/ceph-{osd_id}",
        sudo=True,
    )
    osd_node.exec_command(
        cmd="mount | grep ceph",
        sudo=True,
    )
Ejemplo n.º 6
0
def run(ceph_cluster, **kwargs: Any) -> int:
    """
    Test module for setting up snmp destination node.

    Args:
        ceph_cluster:   The participating Ceph cluster object
        kwargs:         Supported key value pairs for the key config are
                        node            | node on which commands to be executed
                        cmd             | Set of commands to be executed
    Returns:
        0 - on success
        1 - on failure

    Raises:
        CommandError

    """
    config = kwargs["config"]
    nodes = config.get("node")
    node = get_node_by_id(ceph_cluster, nodes)
    LOG.info(f"node: {node.shortname}")
    try:
        configure_firewalld(node)
        configure_snmptrapd(node, ceph_cluster, config)
    except BaseException as be:
        LOG.error(be)
        return 1
    return 0
Ejemplo n.º 7
0
    def set_address(self, config):
        """
        Set IP address to node
        - Attach address to existing nodes

        config:
            service: host
            command: set_address
            base_cmd_args:
                verbose: true
            args:
                node: node1

        Args:
            config
        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")

        if not isinstance(node, CephNode):
            node = get_node_by_id(self.cluster, node)

        if not node:
            raise ResourceNotFoundError("%s node not found/provided")

        logger.info("Set Address on this node : %s" % node.ip_address)
        cmd.extend(["host", "set-addr", node.shortname, node.ip_address])
        self.shell(args=cmd)
        assert node.ip_address in self.get_addr_by_name(node.shortname)
Ejemplo n.º 8
0
    def label_add(self, config):
        """
        Add/Attach label to nodes

          - Attach labels to existing nodes
          - if nodes are empty, all cluster nodes are considered
          - roles defined to each node used as labels( eg., [mon, mgr])

        Args:
            config (Dict): label add configuration

        Example::

            config:
                service: host
                command: label_add
                base_cmd_args:
                    verbose: true
                args:
                    node: node1
                    labels:
                        - mon
                        - osd
        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")
        if not isinstance(node, CephNode):
            node = get_node_by_id(self.cluster, node)

        if not node:
            raise ResourceNotFoundError("%s node not found/provided")

        _labels = args.get("labels")
        if isinstance(_labels, str) and _labels in "apply-all-labels":
            _labels = node.role.role_list

        if not _labels:
            raise ResourceNotFoundError("labels not found/provided")

        logger.info("Add label(s) %s on node %s" % (_labels, node.ip_address))
        for label in _labels:
            _cmd = deepcopy(cmd)
            _cmd.extend(["host", "label", "add", node.hostname, label])
            self.shell(args=_cmd)
            assert label in self.fetch_labels_by_hostname(node.hostname)

            if config.get("validate_admin_keyring") and label == "_admin":
                logger.info("Ceph keyring - default: %s" %
                            DEFAULT_KEYRING_PATH)
                if not monitoring_file_existence(node, DEFAULT_KEYRING_PATH):
                    raise HostOpFailure("Ceph keyring not found")
                if not monitoring_file_existence(node, DEFAULT_CEPH_CONF_PATH):
                    raise HostOpFailure("Ceph configuration file not found")
                logger.info(
                    "Ceph configuration and Keyring found on admin node...")
Ejemplo n.º 9
0
    def label_remove(self, config):
        """
        Removes label from nodes

          - remove labels from existing nodes
          - if nodes are empty, all cluster nodes are considered

        Args:
            config (Dict): label remove configuration

        Example::

            config:
                service: host
                command: label_remove
                base_cmd_args:
                    verbose: true
                args:
                    node: node1
                    labels:
                        - mon
                        - osd
        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")

        if not isinstance(node, CephNode):
            node = get_node_by_id(self.cluster, node)

        if not node:
            raise ResourceNotFoundError("%s node not found/provided")

        _labels = args.get("labels")

        if isinstance(_labels, str) and _labels in "apply-all-labels":
            _labels = node.role.role_list

        if not _labels:
            raise ResourceNotFoundError("labels not found/provided")

        logger.info("Remove label(s) %s on node %s" %
                    (_labels, node.ip_address))

        for label in _labels:
            _cmd = deepcopy(cmd)
            _cmd.extend(["host", "label", "rm", node.shortname, label])
            self.shell(args=_cmd)
Ejemplo n.º 10
0
    def label_add(self, config):
        """
        Add/Attach label to nodes

        - Attach labels to existing nodes
        - if nodes are empty, all cluster nodes are considered
        - roles defined to each node used as labels( eg., [mon, mgr])

        config:
            service: host
            command: label_add
            base_cmd_args:
                verbose: true
            args:
                node: node1
                labels:
                    - mon
                    - osd
        Args:
            config
        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")
        if not isinstance(node, CephNode):
            node = get_node_by_id(self.cluster, node)

        if not node:
            raise ResourceNotFoundError("%s node not found/provided")

        _labels = args.get("labels")
        if isinstance(_labels, str) and _labels in "apply-all-labels":
            _labels = node.role.role_list

        if not _labels:
            raise ResourceNotFoundError("labels not found/provided")

        logger.info("Add label(s) %s on node %s" % (_labels, node.ip_address))
        for label in _labels:
            _cmd = deepcopy(cmd)
            _cmd.extend(["host", "label", "add", node.shortname, label])
            self.shell(args=_cmd)
            assert label in self.fetch_labels_by_hostname(node.shortname)
Ejemplo n.º 11
0
def remove(cls, config: Dict) -> None:
    """
    configure client using the provided configuration.

    Args:
        cls: cephadm object
        config: Key/value pairs provided by the test case to create the client.

    Example::

        config:
            command: remove
            id: client.0                # client Id
            node: "node8"               # client node
            remove_packages:
                - ceph-common           # Remove ceph common packages
            remove_admin_keyring: true  # Copy admin keyring to node
    """
    node = get_node_by_id(cls.cluster, config["node"])
    id_ = config["id"]

    cls.shell(args=["ceph", "auth", "del", id_], )

    if config.get("remove_admin_keyring"):
        node.exec_command(
            cmd="rm -rf /etc/ceph/ceph.client.admin.keyring",
            sudo=True,
        )

    node.exec_command(sudo=True,
                      cmd=f"rm -rf /etc/ceph/ceph.{id_}.keyring",
                      check_ec=False)

    out, _ = node.exec_command(cmd="ls -ltrh /etc/ceph/", sudo=True)
    log.info(out.read().decode().strip())

    # Remove packages like ceph-common
    # Be-careful it may remove entire /etc/ceph directory
    if config.get("remove_packages"):
        for pkg in config.get("remove_packages"):
            node.exec_command(
                cmd=f"yum remove -y {pkg}",
                sudo=True,
            )
Ejemplo n.º 12
0
    def zap(self, config: Dict) -> None:
        """
        Zap particular device

        Args:
            config (Dict): Zap configs

        Returns:
            output (Str), error (Str)  returned by the command.

        Example::

            command: zap
                base_cmd_args:
                    verbose: true
                pos_args:
                    - "node1"
                    - "/dev/vdb"
                args:
                    force: true

        """
        base_cmd = ["ceph", "orch"]

        if config.get("base_cmd_args"):
            base_cmd_args_str = config_dict_to_string(
                config.get("base_cmd_args"))
            base_cmd.append(base_cmd_args_str)
        base_cmd.extend(["device", "zap"])

        pos_args = config["pos_args"]
        node = pos_args[0]
        host_id = get_node_by_id(self.cluster, node)
        host = host_id.shortname
        assert host
        base_cmd.append(host)
        base_cmd.extend(pos_args[1:])

        if config and config.get("args"):
            args = config.get("args")
            base_cmd.append(config_dict_to_string(args))
        return self.shell(args=base_cmd)
Ejemplo n.º 13
0
    def add(self: DaemonProtocol, config: Dict):
        """
        Execute the add method using the object's service name.

        Args:
            config (Dict): Key/value pairs passed from the test suite.

        Example::

            config:
                service: osd
                command: add
                base_cmd_args:
                    verbose: true
                pos_args:
                    - node1
                    - /dev/vdb

        """
        service = config.pop("service")
        base_cmd = ["ceph", "orch"]
        base_cmd.extend(["daemon", "add", service])
        if config.get("base_cmd_args"):
            base_cmd.append(config_dict_to_string(config["base_cmd_args"]))

        pos_args = config["pos_args"]
        node = pos_args[0]
        host_id = get_node_by_id(self.cluster, node)
        host = host_id.shortname

        if service == "osd":
            base_cmd.extend([f"{host}:{','.join(pos_args[1:])}"])

        else:
            if pos_args:
                base_cmd += pos_args

            base_cmd.append(host)

        out, _ = self.shell(base_cmd)
Ejemplo n.º 14
0
    def remove(self, config):
        """
        Remove host from cluster

        Args:
            config (Dict): Remove host configuration

        Example::

            config:
                service: host
                command: remove
                base_cmd_args:
                  verbose: true                        # arguments to ceph orch
                args:
                  node: "node2"                         # node-name or object
        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")

        if not isinstance(node, CephNode):
            node = get_node_by_id(self.cluster, node)

        if not node:
            raise ResourceNotFoundError("%s node not found/provided")

        if (node.hostname == self.installer.node.hostname
                or node.hostname not in self.fetch_host_names()):
            return

        logger.info("Removing node %s" % node.ip_address)
        cmd.extend(["host", "rm", node.hostname])
        self.shell(args=cmd)
        assert node.hostname not in self.fetch_host_names()
Ejemplo n.º 15
0
    def generate_snmp_dest_conf(self, spec):
        """
        Return conf content for snmp-gateway service

        Args:
            spec (Dict): snmp-gateway service config

        Returns:
            destination_conf (Str)

        Example::

            spec:
                - service_type: snmp-gateway
                  service_name: snmp-gateway
                  placement:
                    count: 1
                  spec:
                    credentials:
                      snmp_v3_auth_username: <user_name>
                      snmp_v3_auth_password: <password>
                    port: 9464
                    snmp_destination: node
                    snmp_version: V3

        """
        template = self._get_template("snmp_destination")
        node = get_node_by_id(self.cluster, "node1")
        cmd = "cephadm shell ceph fsid"
        out, err = node.exec_command(sudo=True, cmd=cmd)
        LOG.info(f"fsid: {out}")
        fsid = out.replace("-", "")
        engine_id = fsid[0:32]
        if engine_id:
            spec["spec"]["engine_id"] = engine_id
        LOG.info(f"fsid:{engine_id}")

        return template.render(spec=spec)
Ejemplo n.º 16
0
def translate_to_hostname(cluster, string: str) -> str:
    """
    Return the string with node ID replaced with shortname.

    In this method, the pattern {node:node1} would be replaced with the value of
    node.shortname.

    Args:
        cluster:    Ceph cluster instance
        string:    String to be searched for node ID pattern

    Return:
        String whose node ID's are replaced with shortnames
    """
    replaced_string = string
    node_list = re.findall("{node:(.+?)}", string)

    for node in node_list:
        node_name = get_node_by_id(cluster, node).shortname
        replacement_pattern = "{node:" + node + "}"
        replaced_string = re.sub(replacement_pattern, node_name,
                                 replaced_string)

    return replaced_string
Ejemplo n.º 17
0
def add(cls, config: Dict) -> None:
    """configure client using the provided configuration.

    Args:
        cls: cephadm object
        config: Key/value pairs provided by the test case to create the client.

    Example::

        config:
            command: add
            id: client.1                    # client Id
            node: "node8"                   # client node
            copy_ceph_conf: true|false      # copy ceph conf to provided node
            store-keyring: true             # store keyrin locally under /etc/ceph
            install_packages:
              - ceph_common                 # install ceph common packages
            copy_admin_keyring: true|false  # copy admin keyring
            caps:                           # authorize client capabilities
              - "mon 'allow r'"
              - "osd 'allow rw pool=liverpool'"
    """
    id_ = config["id"]
    client_file = f"/etc/ceph/ceph.{id_}.keyring"

    # Create client
    cmd = ["ceph", "auth", "get-or-create", f"{id_}"]
    [cmd.append(f"{k} '{v}'") for k, v in config.get("caps", {}).items()]
    cnt_key, err = cls.shell(args=cmd)

    def put_file(client, file_name, content, file_mode, sudo=True):
        file_ = client.remote_file(sudo=sudo,
                                   file_name=file_name,
                                   file_mode=file_mode)
        file_.write(content)
        file_.flush()

    if config.get("node"):
        node = get_node_by_id(cls.cluster, config["node"])

        # Copy the keyring to client
        node.exec_command(sudo=True, cmd="mkdir -p /etc/ceph")
        put_file(node, client_file, cnt_key, "w")

        if config.get("copy_ceph_conf", True):
            # Get minimal ceph.conf
            ceph_conf, err = cls.shell(
                args=["ceph", "config", "generate-minimal-conf"])
            # Copy the ceph.conf to client
            put_file(node, "/etc/ceph/ceph.conf", ceph_conf, "w")

        # Copy admin keyring to client node
        if config.get("copy_admin_keyring"):
            admin_keyring, _ = cls.shell(
                args=["ceph", "auth", "get", "client.admin"])
            put_file(node, "/etc/ceph/ceph.client.admin.keyring",
                     admin_keyring, "w")

        # Install ceph-common
        if config.get("install_packages"):
            for pkg in config.get("install_packages"):
                node.exec_command(cmd=f"yum install -y --nogpgcheck {pkg}",
                                  sudo=True)

        out, _ = node.exec_command(cmd="ls -ltrh /etc/ceph/", sudo=True)
        log.info(out.read().decode().strip())

    # Hold local copy of the client key-ring in the installer node
    if config.get("store-keyring"):
        put_file(cls.installer, client_file, cnt_key, "w")
Ejemplo n.º 18
0
def run(ceph_cluster, **kw):
    LOG.info("Running test")
    ceph_nodes = kw.get("ceph_nodes")
    LOG.info("Running ceph ansible test")
    config = kw.get("config")
    test_data = kw.get("test_data")
    prev_install_version = test_data["install_version"]
    skip_version_compare = config.get("skip_version_compare")
    limit_node = config.get("limit")
    containerized = config.get("ansi_config").get("containerized_deployment")
    build = config.get("build", config.get("rhbuild"))
    LOG.info("Build for upgrade: {build}".format(build=build))
    cluster_name = config.get("ansi_config").get("cluster")

    ubuntu_repo = config.get("ubuntu_repo")
    hotfix_repo = config.get("hotfix_repo")
    cloud_type = config.get("cloud-type", "openstack")
    base_url = config.get("base_url")
    installer_url = config.get("installer_url")
    config["ansi_config"]["public_network"] = get_public_network(ceph_nodes[0])

    ceph_cluster.ansible_config = config["ansi_config"]
    ceph_cluster.custom_config = test_data.get("custom-config")
    ceph_cluster.custom_config_file = test_data.get("custom-config-file")
    ceph_cluster.use_cdn = config.get("use_cdn")

    config["ansi_config"].update(
        set_container_info(ceph_cluster, config, ceph_cluster.use_cdn, containerized)
    )

    # Translate RGW node to ip address for Multisite
    rgw_pull_host = config["ansi_config"].get("rgw_pullhost")
    if rgw_pull_host:
        ceph_cluster.ansible_config["rgw_pullhost"] = translate_to_ip(
            kw["ceph_cluster_dict"], ceph_cluster.name, rgw_pull_host
        )

    ceph_installer = ceph_cluster.get_ceph_object("installer")
    ansible_dir = "/usr/share/ceph-ansible"

    if config.get("skip_setup") is True:
        LOG.info("Skipping setup of ceph cluster")
        return 0

    # set pre-upgrade install version
    test_data["install_version"] = build
    LOG.info("Previous install version: {}".format(prev_install_version))

    # retrieve pre-upgrade versions and initialize container counts
    pre_upgrade_versions = get_ceph_versions(ceph_cluster.get_nodes(), containerized)
    pre_upgrade_container_counts = {}

    # setup packages based on build
    ceph_cluster.setup_packages(
        base_url, hotfix_repo, installer_url, ubuntu_repo, build, cloud_type
    )

    # backup existing hosts file and ansible config
    ceph_installer.exec_command(cmd="cp {}/hosts /tmp/hosts".format(ansible_dir))
    ceph_installer.exec_command(
        cmd="cp {}/group_vars/all.yml /tmp/all.yml".format(ansible_dir)
    )

    # update ceph-ansible
    ceph_installer.install_ceph_ansible(build, upgrade=True)

    # restore hosts file
    ceph_installer.exec_command(
        sudo=True, cmd="cp /tmp/hosts {}/hosts".format(ansible_dir)
    )

    # If upgrading from version 2 update hosts file with mgrs
    if prev_install_version.startswith("2") and build.startswith("3"):
        collocate_mons_with_mgrs(ceph_cluster, ansible_dir)

    # configure fetch directory path
    if config.get("ansi_config").get("fetch_directory") is None:
        config["ansi_config"]["fetch_directory"] = "~/fetch/"

    # set the docker image tag if necessary
    if containerized and config.get("ansi_config").get("docker-insecure-registry"):
        config["ansi_config"]["ceph_docker_image_tag"] = get_latest_container_image_tag(
            build
        )
    LOG.info("gvar: {}".format(config.get("ansi_config")))
    gvar = yaml.dump(config.get("ansi_config"), default_flow_style=False)

    # create all.yml
    LOG.info("global vars {}".format(gvar))
    gvars_file = ceph_installer.remote_file(
        sudo=True, file_name="{}/group_vars/all.yml".format(ansible_dir), file_mode="w"
    )
    gvars_file.write(gvar)
    gvars_file.flush()

    # retrieve container count if containerized
    if containerized:
        pre_upgrade_container_counts = get_container_counts(ceph_cluster)

    # configure insecure registry if necessary
    if config.get("docker-insecure-registry"):
        ceph_cluster.setup_insecure_registry()

    # copy rolling update from infrastructure playbook
    jewel_minor_update = build.startswith("2")
    if build.startswith("4") or build.startswith("5"):
        cmd = (
            "cd {};"
            "ANSIBLE_STDOUT_CALLBACK=debug;"
            "ansible-playbook -e ireallymeanit=yes -vvvv -i "
            "hosts infrastructure-playbooks/rolling_update.yml".format(ansible_dir)
        )
    else:
        ceph_installer.exec_command(
            sudo=True,
            cmd="cd {} ; cp infrastructure-playbooks/rolling_update.yml .".format(
                ansible_dir
            ),
        )
        cmd = (
            "cd {};"
            "ANSIBLE_STDOUT_CALLBACK=debug;"
            "ansible-playbook -e ireallymeanit=yes -vvvv -i hosts rolling_update.yml".format(
                ansible_dir
            )
        )
    if jewel_minor_update:
        cmd += " -e jewel_minor_update=true"
        LOG.info("Upgrade is jewel_minor_update, cmd: {cmd}".format(cmd=cmd))

    if config.get("ansi_cli_args"):
        cmd += config_dict_to_string(config["ansi_cli_args"])

    short_names = []
    if limit_node:
        for node in limit_node:
            short_name = get_node_by_id(ceph_cluster, node).shortname
            short_names.append(short_name)
            matched_short_names = ",".join(short_names)
        cmd += f" --limit {matched_short_names}"

    out, rc = ceph_installer.exec_command(cmd=cmd, long_running=True)

    if rc != 0:
        LOG.error("Failed during upgrade (rc = {})".format(rc))
        return rc

    # set build to new version
    LOG.info("Setting install_version to {build}".format(build=build))
    test_data["install_version"] = build
    ceph_cluster.rhcs_version = build

    # check if all mon's and osd's are in correct state
    num_osds = ceph_cluster.ceph_demon_stat["osd"]
    num_mons = ceph_cluster.ceph_demon_stat["mon"]
    test_data["ceph-ansible"] = {
        "num-osds": num_osds,
        "num-mons": num_mons,
        "rhbuild": build,
    }

    # compare pre and post upgrade versions
    if skip_version_compare:
        LOG.warning("Skipping version comparison.")
    else:
        if not jewel_minor_update:
            post_upgrade_versions = get_ceph_versions(ceph_nodes, containerized)
            version_compare_fail = compare_ceph_versions(
                pre_upgrade_versions, post_upgrade_versions
            )
            if version_compare_fail:
                return version_compare_fail

    # compare pre and post upgrade container counts
    if containerized:
        post_upgrade_container_counts = get_container_counts(ceph_cluster)
        container_count_fail = compare_container_counts(
            pre_upgrade_container_counts,
            post_upgrade_container_counts,
            prev_install_version,
        )
        if container_count_fail:
            return container_count_fail

    client = ceph_cluster.get_ceph_object("mon")

    if build.startswith("5"):

        cmd = (
            "cd {};"
            "ANSIBLE_STDOUT_CALLBACK=debug;"
            "ansible-playbook -e ireallymeanit=yes -vvvv -i "
            "hosts infrastructure-playbooks/cephadm-adopt.yml".format(ansible_dir)
        )
        out, rc = ceph_installer.exec_command(cmd=cmd, long_running=True)

        if rc != 0:
            LOG.error("Failed during cephadm adopt (rc = {})".format(rc))
            return rc

        client = ceph_cluster.get_nodes("mon")[0]

    return ceph_cluster.check_health(
        build,
        cluster_name=cluster_name,
        client=client,
        timeout=config.get("timeout", 300),
    )
Ejemplo n.º 19
0
    def add(self, config):
        """
        Add host to cluster

        Args:
            config (Dict):  host addition configuration

        Example::

            config:
                service: host
                command: add
                base_cmd_args:                          # arguments to ceph orch
                    concise: true
                    block: true
                args:
                    node: "node2"                         # node-name or object
                    attach_ip_address: bool               # true or false
                    labels: [mon, osd] or apply-all-labels

            add_label: host added with labels(roles assigned are considered)
            attach_address: host added with ip address(host ip address used)

            labels are considered if list of strings are provided or all roles associated
            with node will be considered if string "apply-all-labels"

        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        args = config["args"]
        node = args.pop("node")
        ceph_node = get_node_by_id(self.cluster, node_name=node)

        if not ceph_node:
            raise ResourceNotFoundError(f"No matching resource found: {node}")

        # Skipping client node, if only client label is attached
        if (len(ceph_node.role.role_list) == 1
                and ["client"] == ceph_node.role.role_list):
            return

        attach_address = args.get("attach_ip_address")
        _labels = args.get("labels")
        if isinstance(_labels, str) and _labels == "apply-all-labels":
            label_set = set(ceph_node.role.role_list)
            _labels = list(label_set)

        cmd.extend(["host", "add", ceph_node.hostname])

        if attach_address:
            cmd.append(ceph_node.ip_address)

        if _labels:
            # To fill mandate <address> argument, In case attach_address is False
            if not attach_address:
                cmd.append("''")

            cmd += _labels

        logger.info("Adding node %s, (attach_address: %s, labels: %s)" %
                    (ceph_node.ip_address, attach_address, _labels))
        # Add host
        self.shell(args=cmd)

        # validate host existence
        if ceph_node.hostname not in self.fetch_host_names():
            raise HostOpFailure(
                f"Hostname verify failure. Expected {ceph_node.hostname}")

        if attach_address:
            if ceph_node.ip_address != self.get_addr_by_name(
                    ceph_node.hostname):
                raise HostOpFailure(
                    f"IP address verify failed. Expected {ceph_node.ip_address}"
                )

        if _labels:
            assert sorted(self.fetch_labels_by_hostname(
                ceph_node.hostname)) == sorted(_labels)

            if config.get("validate_admin_keyring") and "_admin" in _labels:
                if not monitoring_file_existence(ceph_node,
                                                 DEFAULT_KEYRING_PATH):
                    raise HostOpFailure("Ceph keyring not found")
                if not monitoring_file_existence(ceph_node,
                                                 DEFAULT_CEPH_CONF_PATH):
                    raise HostOpFailure("Ceph configuration file not found")
                logger.info("Ceph configuration and Keyring found")
def run(ceph_cluster, **kw):
    """
    enables connectivity mode and deploys stretch cluster with arbiter mon node
    Args:
        ceph_cluster (ceph.ceph.Ceph): ceph cluster
    """

    log.info("Deploying stretch cluster with arbiter mon node")
    log.info(run.__doc__)
    config = kw.get("config")
    cephadm = CephAdmin(cluster=ceph_cluster, **config)
    rados_obj = RadosOrchestrator(node=cephadm)
    mon_obj = MonElectionStrategies(rados_obj=rados_obj)
    client_node = ceph_cluster.get_nodes(role="client")[0]

    site1_name = config["site1"]["name"]
    site2_name = config["site2"]["name"]

    # disabling automatic crush update
    cmd = "ceph config set osd osd_crush_update_on_start false"
    cephadm.shell([cmd])

    # Sleeping for 2 seconds after map update.
    time.sleep(2)

    # Setting the election strategy to connectivity mode
    if not mon_obj.set_election_strategy(mode="connectivity"):
        log.error("could not set election strategy to connectivity mode")
        return 1

    # Sleeping for 2 seconds after strategy update.
    time.sleep(2)

    # Checking updated election strategy in mon map
    strategy = mon_obj.get_election_strategy()
    if strategy != 3:
        log.error(
            f"cluster created election strategy other than connectivity, i.e {strategy}"
        )
        return 1
    log.info("Enabled connectivity mode on the cluster")

    # Creating new datacenter crush objects and moving under root/default
    for name in [site1_name, site2_name]:
        cmd = f"ceph osd crush add-bucket {name} datacenter"
        rados_obj.run_ceph_command(cmd)
        time.sleep(2)
        move_crush_item(cephadm, crush_obj=name, name="root", value="default")
        time.sleep(2)

    # Moving all the OSD and Mon daemons into respective sites
    sites = ["site1", "site2", "site3"]
    for site in sites:
        mon_hosts = [
            host_obj.hostname
            for host_obj in ceph_cluster.get_nodes(role="mon")
        ]
        log.info(f"Mon hosts defined: {mon_hosts}")
        osd_hosts = [
            host_obj.hostname
            for host_obj in ceph_cluster.get_nodes(role="osd")
        ]
        log.info(f"OSD hosts defined: {osd_hosts}")
        # Collecting hosts from each site and setting locations accordingly
        site_details = config[site]
        crush_name = site_details["name"]
        host_nodes = cephadm.cluster.get_nodes()

        for item in site_details["hosts"]:
            host = [
                node for node in host_nodes if re.search(item, node.hostname)
            ][0]
            # Moving the mon daemons into site
            if host.hostname in mon_hosts:
                cmd = f"ceph mon set_location {host.hostname} datacenter={crush_name}"
                cephadm.shell([cmd])
                log.info(
                    f"Set location for mon {host.hostname} onto site {crush_name}\n"
                    "sleeping for 5 seconds")
                time.sleep(5)

            # Moving the osd daemons into site
            if host.hostname in osd_hosts:
                move_crush_item(
                    node=cephadm,
                    crush_obj=host.hostname,
                    name="datacenter",
                    value=crush_name,
                )
                log.info(
                    f"Set location for OSD {host.hostname} onto site {crush_name}\n"
                    "sleeping for 5 seconds")
                time.sleep(5)

    log.info("Moved all the hosts into respective sites")

    stretch_rule_name = config.get("stretch_rule_name", "stretch_rule")
    if not setup_crush_rule(
            node=client_node,
            rule_name=stretch_rule_name,
            site1=site1_name,
            site2=site2_name,
    ):
        log.error("Failed to Add crush rules in the crush map")
        return 1

    # Sleeping for 5 sec for the strategy to be active
    time.sleep(5)

    # Enabling the stretch cluster mode
    tiebreaker_node = get_node_by_id(cephadm.cluster,
                                     config["site3"]["hosts"][0])
    log.info(f"tiebreaker node provided: {tiebreaker_node.hostname}")
    cmd = f"ceph mon enable_stretch_mode {tiebreaker_node.hostname} {stretch_rule_name} datacenter"
    try:
        cephadm.shell([cmd])
    except Exception as err:
        log.error(
            f"Error while enabling stretch rule on the datacenter. Command : {cmd}"
        )
        log.error(err)
        return 1
    time.sleep(2)

    # wait for PG's to settle down with new crush rules after deployment of stretch mode
    wait_for_clean_pg_sets(rados_obj)

    # Checking if the pools have been updated with the new crush rules
    acting_set = rados_obj.get_pg_acting_set()
    if len(acting_set) != 4:
        log.error(
            f"There are {len(acting_set)} OSD's in PG. OSDs: {acting_set}. Stretch cluster requires 4"
        )
        return 1
    log.info(f"Acting set : {acting_set} Consists of 4 OSD's per PG")
    log.info("Stretch rule with arbiter monitor node set up successfully")
    return 0
Ejemplo n.º 21
0
    def __maintenance(self: ServiceProtocol, config: Dict, op: str) -> None:
        """Perform host maintenance operations using orchestrator

        Args:
            config(Dict):     Key/value pairs passed from the test suite.
                        pos_args(Dict)        - List to be added as positional params, in this case
                                                we add name of the node on which host maintenance is to be
                                                performed as a positional parameter. (Refer example below)
        Example::

            config:
                service: host
                command: enter
                verify: true
                args:
                    node: name of the node to be placed under maintenance
            op: operation to be performed enter/exit

        """
        cmd = ["ceph", "orch"]
        if config.get("base_cmd_args"):
            cmd.append(config_dict_to_string(config["base_cmd_args"]))

        cmd.append("host")
        cmd.append("maintenance")
        cmd.append(op)

        verify = config.pop("verify", True)
        args = config["args"]
        nodename = args.pop("node")
        if not nodename:
            raise HostMaintenanceFailure(
                "Node on which maintenance mode to be configured is not provided"
            )

        node = get_node_by_id(self.cluster, nodename)

        if not node:
            raise ResourceNotFoundError(f"No matching resource found: {nodename}")

        if not self.get_host(node.hostname):
            raise HostMaintenanceFailure(
                "The node specified for maintenance is not deployed in the cluster"
            )

        cmd.append(node.hostname)

        if op == "enter":
            manager = Manager(cluster=self.cluster, **config)
            if not manager.switch_active(node):
                raise HostMaintenanceFailure(
                    "Unable to switch active mgr to a node other than the input node"
                )
            cmd.append("--force")

        self.shell(args=cmd)

        if verify and not self.check_maintenance_status(op, node):
            raise HostMaintenanceFailure(
                f"The host maintenance operation {op} was not successful on the host {node.hostname}"
            )