Python SpectrumScaleException 예제들, ansible.module_utils.ibm_spectrumscale_utils.SpectrumScaleException Python 예제들

예제 #1

0

파일 보기

파일: ibm_spectrumscale_disk_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def delete_disk(node_name, filesystem_name, disk_names, admin_ip=None):
        """
            This function performs "mmdeldisk".
            Args:
                node_name (str): Node for which disk needs to be deleted.
                filesystems_name (str): Filesystem name associated with the disks.
                disk_names (list): Disk name to be deleted.
                                  Ex: ['gpfs1nsd', 'gpfs2nsd', 'gpfs3nsd']
        """
        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        disk_name_str = ";".join(disk_names)

        cmd.extend([
            os.path.join(GPFS_CMD_PATH, "mmdeldisk"), filesystem_name,
            disk_name_str, '-N', node_name
        ])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Deleting disk(s) failed. ",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

예제 #2

0

파일 보기

파일: ibm_spectrumscale_nsd_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def remove_server_access_to_nsd(nsd_to_delete,
                                    node_to_delete,
                                    nsd_attached_to_nodes,
                                    admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        # mmchnsd "nsd1:node1.domain.com"
        server_access_list = ','.join(map(str, nsd_attached_to_nodes))
        server_access_list = nsd_to_delete + ":" + server_access_list

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend(
            [os.path.join(GPFS_CMD_PATH, "mmchnsd"), server_access_list])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            e_msg = ("Exception encountered during execution of modifying NSD "
                     "server access list for NSD={0} on Node={1}. Exception "
                     "Message={2)".format(nsd_to_delete, node_to_delete, e))
            raise SpectrumScaleException(e_msg, cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], rc, stdout, stderr)

        if rc != RC_SUCCESS:
            e_msg = ("Failed to modify NSD server access list for NSD={0} on "
                     "Node={1}".format(nsd_to_delete, node_to_delete))
            raise SpectrumScaleException(e_msg, cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], rc, stdout, stderr)

예제 #3

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def apply_license(node_name, license, admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        if isinstance(node_name, str):
            node_name_str = node_name
        else:
            node_name_str = ' '.join(node_name)

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([
            os.path.join(GPFS_CMD_PATH, "mmchlicense"), license, "--accept",
            "-N", node_name_str
        ])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Changing license on  node failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        return rc, stdout

예제 #4

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def create_cluster(name, stanza_path, admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([
            os.path.join(GPFS_CMD_PATH, "mmcrcluster"), "-N", stanza_path,
            "-C", name
        ])
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Creating cluster failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        return rc, stdout

예제 #5

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def add_node(node_name, stanza_path, admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        if isinstance(node_name, str):
            node_name_str = node_name
        else:
            node_name_str = ' '.join(node_name)

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([
            os.path.join(GPFS_CMD_PATH, "mmaddnode"), "-N", stanza_path,
            "--accept"
        ])
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Adding node to cluster failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        return rc, stdout, stderr

예제 #6

0

파일 보기

    def unmount_filesystems(node_name, wait=True, admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend(
            [os.path.join(GPFS_CMD_PATH, "mmumount"), "all", "-N", node_name])
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            if 'mmumount: No file systems were found' in stdout or \
                    'mmumount: No file systems were found' in stderr:
                # We can claim success on umount if there are no filesystems
                return RC_SUCCESS

            raise SpectrumScaleException(
                "Unmounting filesystems on node failed", cmd[0:mmcmd_idx],
                cmd[mmcmd_idx:], rc, stdout, stderr)
        return rc, stdout

예제 #7

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def start_node(node_name, wait=True, admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS
        cmd = []
        mmcmd_idx = 1

        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        if isinstance(node_name, str):
            node_name_str = node_name
            node_name_list = [node_name]
        else:
            node_name_str = ' '.join(node_name)
            node_name_list = node_name

        cmd.extend(
            [os.path.join(GPFS_CMD_PATH, "mmstartup"), "-N", node_name_str])
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Starting node failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        if wait:
            # Wait for a maximum of 36 * 5 = 180 seconds (3 minutes)
            MAX_RETRY = 36
            retry = 0
            done = False
            while (not done and retry < MAX_RETRY):
                time.sleep(5)
                node_state = SpectrumScaleNode.get_state(
                    node_name_list, admin_ip)
                done = all("active" in state
                           for state in list(node_state.values()))
                retry = retry + 1

            if not done:
                raise SpectrumScaleException(
                    "Starting node(s) timed out", cmd[0:mmcmd_idx],
                    cmd[mmcmd_idx:], -1, "",
                    "Node state is not \"active\" after retries")
        return rc, stdout

예제 #8

0

파일 보기

def check_roles_before_delete(logger, existing_node_list_to_del):
    logger.debug("Function Entry: check_roles_before_delete(). "
                 "Args: existing_node_list_to_del="
                 "{0}".format(existing_node_list_to_del))

    logger.info("Checking the designations for all nodes marked for removal")

    for node_to_del in existing_node_list_to_del:
        # Do not delete nodes that are designated as "quorum", "manager",
        # "gateway", "ces", "TCT", "SNMP"
        if (node_to_del.is_quorum_node() or node_to_del.is_manager_node()
                or node_to_del.is_gateway_node() or node_to_del.is_ces_node()
                or node_to_del.is_tct_node() or node_to_del.is_snmp_node()):
            exp_msg = ("Cannot remove node {0} since it is designated "
                       "as either a quorum, gateway, CES, TCT or SNMP "
                       "node. Re-run the current command without "
                       "{1}".format(node_to_del.get_admin_node_name(),
                                    node_to_del.get_admin_node_name()))
            logger.error(exp_msg)
            raise SpectrumScaleException(exp_msg, "", [], -1, "", "")

    # TODO: Should we also check the Zimon Collector Nodes
    # zimon_col_nodes = get_zimon_collectors()

    logger.debug("Function Exit: check_roles_before_delete().")

예제 #9

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def get_state(node_names=[], admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS
        cmd = []
        mmcmd_idx = 1

        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmgetstate")])

        if len(node_names) == 0:
            cmd.append("-a")
        else:
            # If a set of node names have ben provided, use that instead
            node_name_str = ','.join(node_names)
            cmd.append("-N")
            cmd.append(node_name_str)

        cmd.append("-Y")

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Retrieving the node state failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        node_state_dict = parse_unique_records(stdout)
        node_state_list = node_state_dict["mmgetstate"]

        node_state = {}
        for node in node_state_list:
            node_state[node["nodeName"]] = node["state"]

        return node_state

예제 #10

0

파일 보기

파일: ibm_spectrumscale_df_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def get_df_info(filesystem_name, admin_ip=None):
        nsd_df_info_list = []

        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        # TODO
        # The original code executed the command "/usr/lpp/mmfs/bin/mmdf <fs_name> -d -Y"
        # but this did not work if there were multiple Pools with a separate System Pool.
        # Therefore the "-d" flag has been removed. Check to see why the "-d" flag was
        # was used in the first place
        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmdf"), filesystem_name, "-Y"])

        try: 
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx], cmd[mmcmd_idx:],
                                         -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Retrieving filesystem disk space usage failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        df_dict = parse_aggregate_cmd_output(stdout, ["poolTotal", "data", 
                                                      "metadata", "fsTotal", 
                                                      "inode"])

        nsd_df_list = df_dict["nsd"]

        for nsd_df in nsd_df_list:
            nsd_df_instance = SpectrumScaleDf(nsd_df)
            nsd_df_info_list.append(nsd_df_instance)

        return nsd_df_info_list

예제 #11

0

파일 보기

    def get_filesystems(admin_ip=None):
        filesystem_info_list = []

        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmlsfs"), "all", "-Y"])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            if 'mmlsfs: No file systems were found.' in stdout or \
                    'mmlsfs: No file systems were found.' in stderr:
                return filesystem_info_list

            raise SpectrumScaleException(
                "Retrieving filesystem information failed", cmd[0:mmcmd_idx],
                cmd[mmcmd_idx:], rc, stdout, stderr)

        filesystem_dict = parse_simple_cmd_output(stdout, "deviceName",
                                                  "properties", "filesystems")
        filesystem_list = filesystem_dict["filesystems"]

        for filesystem in filesystem_list:
            device_name = filesystem["deviceName"]
            fs_properties = filesystem["properties"]
            filesystem_instance = SpectrumScaleFS(device_name, fs_properties)
            filesystem_info_list.append(filesystem_instance)

        return filesystem_info_list

예제 #12

0

파일 보기

파일: ibm_spectrumscale_cluster_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def __retrieve_cluster_info(self, admin_ip):
        stdout = stderr = ""
        rc = RC_SUCCESS
        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmlscluster"), "-Y"])
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)
        if rc != RC_SUCCESS:
            raise SpectrumScaleException(
                "Retrieving the cluster information failed", cmd[0:mmcmd_idx],
                cmd[mmcmd_idx:], rc, stdout, stderr)

        return parse_aggregate_cmd_output(
            stdout, ["clusterSummary", "cnfsSummary", "cesSummary"])

예제 #13

0

파일 보기

    def create_filesystem(name,
                          stanza_path,
                          block_size,
                          default_metadata_replicas,
                          default_data_replicas,
                          num_nodes,
                          automatic_mount_option,
                          default_mount_point,
                          admin_ip=None):
        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([
            os.path.join(GPFS_CMD_PATH,
                         "mmcrfs"), name, "-F", stanza_path, "-B", block_size,
            "-m", default_metadata_replicas, "-r", default_data_replicas, "-n",
            num_nodes, "-A", automatic_mount_option, "-T", default_mount_point
        ])
        # TODO: Make this idempotent
        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Create filesystems on node failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        return rc, stdout

예제 #14

0

파일 보기

파일: ibm_spectrumscale_disk_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def get_all_disk_info(fs_name, admin_ip=None):
        disk_info_list = []
        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmlsdisk"), fs_name, "-Y"])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc == RC_SUCCESS:
            # TODO: Check the return codes and examine other possibility and verify below
            if "No disks were found" in stderr:
                return nsd_info_list
        else:
            raise SpectrumScaleException("Retrieving disk information failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        disk_dict = parse_unique_records(stdout)
        disk_list = disk_dict["mmlsdisk"]

        for disk in disk_list:
            disk_instance = SpectrumScaleDisk(disk, fs_name)
            disk_info_list.append(disk_instance)

        return disk_info_list

예제 #15

0

파일 보기

파일: ibm_spectrumscale_zimon_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

def get_zimon_collectors():
    """
        This function returns zimon collector node ip's.
    """
    stdout = stderr = ""
    rc = RC_SUCCESS

    cmd = []
    mmcmd_idx = 1
    if admin_ip:
        cmd.extend(["ssh", admin_ip])
        mmcmd_idx = len(cmd) + 1

    cmd.extend([os.path.join(GPFS_CMD_PATH, "mmperfmon"), "config", "show"])

    try:
        stdout, stderr, rc = runCmd(cmd, sh=False)
    except Exception as e:
        raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx], cmd[mmcmd_idx:],
                                     -1, stdout, stderr)

    if rc != RC_SUCCESS:
        raise SpectrumScaleException("Retrieving Zimon information failed",
                                     cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                     stdout, stderr)

    output = stdout.splitlines()
    col_regex = re.compile(r'colCandidates\s=\s(?P<collectors>.*)')
    for cmd_line in output:
        if col_regex.match(cmd_line):
            collectors = col_regex.match(cmd_line).group('collectors')

    collectors = collectors.replace("\"", '').replace(" ", '')
    collectors = collectors.split(',')

    return collectors

예제 #16

0

파일 보기

파일: ibm_spectrumscale_nsd_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def get_all_nsd_info(admin_ip=None):
        nsd_info_list = []

        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmlsnsd"), "-a", "-X", "-Y"])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc == RC_SUCCESS:
            if "No disks were found" in stderr:
                return nsd_info_list
        else:
            raise SpectrumScaleException("Retrieving NSD information Failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

        nsd_dict = parse_unique_records(stdout)
        nsd_list = nsd_dict["nsd"]

        for nsd in nsd_list:
            nsd_instance = SpectrumScaleNSD(nsd)
            nsd_info_list.append(nsd_instance)

        return nsd_info_list

예제 #17

0

파일 보기

파일: ibm_spectrumscale_nsd_utils.py 프로젝트: whowutwut/ibm-spectrum-scale-install-infra

    def delete_nsd(nsd_list, admin_ip=None):
        nsd_names = ";".join(nsd_list)

        stdout = stderr = ""
        rc = RC_SUCCESS

        cmd = []
        mmcmd_idx = 1
        if admin_ip:
            cmd.extend(["ssh", admin_ip])
            mmcmd_idx = len(cmd) + 1

        cmd.extend([os.path.join(GPFS_CMD_PATH, "mmdelnsd"), nsd_names])

        try:
            stdout, stderr, rc = runCmd(cmd, sh=False)
        except Exception as e:
            raise SpectrumScaleException(str(e), cmd[0:mmcmd_idx],
                                         cmd[mmcmd_idx:], -1, stdout, stderr)

        if rc != RC_SUCCESS:
            raise SpectrumScaleException("Deleting NSD(s) Failed",
                                         cmd[0:mmcmd_idx], cmd[mmcmd_idx:], rc,
                                         stdout, stderr)

예제 #18

0

파일 보기

def check_disk_health(logger, fs_nsd_map):
    logger.debug("Function Entry: check_disk_health(). "
                 "Args fs_nsd_map={0}".format(fs_nsd_map))

    unhealthy_disks = []
    for fs_name, disk_list in list(fs_nsd_map.items()):
        for disk in disk_list:
            if "down" in disk.get_availability():
                unhealthy_disks.append(disk.get_nsd_name())

    if unhealthy_disks:
        unhealthy_disks_str = ' '.join(map(str, unhealthy_disks))
        error_msg = ("The following disks \"{0}\" are currently not healthy. "
                     "Ensure all disks in the cluster are healthy before "
                     "retrying the operation.".format(unhealthy_disks_str))
        logger.error(error_msg)
        raise SpectrumScaleException(error_msg, "", [], -1, "", "")

    logger.debug("Function Exit: check_disk_health(). ")

예제 #19

0

파일 보기

def check_cluster_health(logger):
    logger.debug("Function Entry: check_cluster_health(). ")

    unhealthy_nodes = []
    all_nodes_state = SpectrumScaleNode.get_state()

    for node_name, state in list(all_nodes_state.items()):
        if ("down" in state or "arbitrating" in state or "unknown" in state):
            unhealthy_nodes.append(node_name)

    if unhealthy_nodes:
        unhealthy_nodes_str = ' '.join(map(str, unhealthy_nodes))
        error_msg = (
            "The following node(s) \"{0}\" is(are) currently not up. "
            "Ensure all nodes in the cluster are fully operational "
            "before retrying the operation.".format(unhealthy_nodes_str))
        logger.error(error_msg)
        raise SpectrumScaleException(error_msg, "", [], -1, "", "")

    logger.debug("Function Exit: check_cluster_health(). ")

예제 #20

0

파일 보기

def remove_nodes(logger, node_names_to_delete):
    logger.debug("Function Entry: remove_nodes(). "
                 "Args: node_list={0}".format(node_names_to_delete))

    rc = RC_SUCCESS
    msg = result_json = ""
    removed_node_list = []

    logger.info("Attempting to remove node(s) {0} from the "
                "cluster".format(' '.join(map(str, node_names_to_delete))))

    # TODO: The cluster health check should only fail if we are attempting
    #       to remove NSD servers while other NSD servers are down. The
    #       removal of compute nodes should be permitted even if NSD
    #       servers are down. For now disable check until correct algorithm
    #       can be implemented
    # Ensure all nodes in the cluster are healthy
    #check_cluster_health(logger)

    # Check that the list of nodes to delete already exist. If not,
    # simply ignore
    nodes_to_delete = check_nodes_exist(logger, node_names_to_delete)

    if len(nodes_to_delete) == 0:
        msg = str("All node(s) marked for removal ({0}) are already not part "
                  "of the cluster".format(' '.join(
                      map(str, node_names_to_delete))))
        logger.info(msg)
        return rc, msg, result_json

    # Precheck nodes to make sure they do not have any roles that should
    # not be deleted
    check_roles_before_delete(logger, nodes_to_delete)

    # For each Filesystem, Get the Filesystem to NSD (disk) mapping
    fs_nsd_map = get_filesystem_to_nsd_mapping(logger)

    # TODO: The disk health check should only fail if we are attempting
    #       to remove NSD servers when any disks are down. The removal
    #       of compute nodes should be permitted even if disks are down.
    #       For now disable check until correct algorithm can be implemented
    #check_disk_health(logger, fs_nsd_map)

    # An NSD node can have access to a multi attach NSD (shared NSD) or
    # dedicated access to the NSD (FPO model) or a combination of both.

    # First modify the Shared NSDs and remove access to all NSD Nodes
    # that are to be deleted. Note: As long as these are Shared NSD's
    # another NSD server will continue to have access to the NSD (and
    # therefore Data)
    remove_multi_attach_nsd(logger, nodes_to_delete)

    # Finally delete any dedicated NSDs (this will force the data to be
    # copied to another NSD in the same Filesystem). Finally delete the
    # node from the cluster

    logger.debug("Identified all filesystem to disk mapping: "
                 "{0}".format(fs_nsd_map))

    for node_to_del_obj in nodes_to_delete:
        node_to_del = node_to_del_obj.get_admin_node_name()
        logger.debug("Operating on server: {0}".format(node_to_del))

        # For each node to be deleted, retrieve the NSDs (disks) on the node
        all_node_disks = get_all_nsds_of_node(logger, node_to_del)
        logger.debug("Identified disks for server ({0}): "
                     "{1}".format(node_to_del, all_node_disks))

        # The Node does not have any disks on it (compute node). Delete the
        # node without any more processing
        if len(all_node_disks) == 0:
            logger.info("Unmounting filesystem(s) on {0}".format(node_to_del))
            SpectrumScaleFS.unmount_filesystems(node_to_del, wait=True)

            logger.info("Shutting down node {0}".format(node_to_del))
            SpectrumScaleNode.shutdown_node(node_to_del, wait=True)

            logger.info("Deleting compute node {0}".format(node_to_del))
            SpectrumScaleCluster.delete_node(node_to_del)

            removed_node_list.append(node_to_del)
            continue

        # Generate a list of NSD (disks) on the host to be deleted for
        # each filesystem
        #
        # fs_disk_map{} contains the following:
        #    Filesystem Name -> NSDs on the host to be deleted
        fs_disk_map = {}
        for fs_name, disks in list(fs_nsd_map.items()):
            node_specific_disks = []
            for disk_instance in disks:
                if disk_instance.get_nsd_name() in all_node_disks:
                    node_specific_disks.append(disk_instance.get_nsd_name())
            fs_disk_map[fs_name] = node_specific_disks

        logger.debug("Identified filesystem to disk map for server "
                     "({0}): {1}".format(node_to_del, fs_disk_map))

        for fs in fs_disk_map:
            disk_cap = gpfs_df_disk(logger, fs)
            logger.debug("Identified disk capacity for filesystem "
                         "({0}): {1}".format(fs, disk_cap))

            # Algorithm used for checking at-least 20% free space during
            # mmdeldisk in progress;
            # - Identify the size of data stored in disks going to be
            #   deleted.
            # - Identify the free size of the filesystem
            #   (excluding the disk going to be deleted)
            # - Allow for disk deletion, if total_free size is 20% greater
            #   even after moving used data stored in disk going to be deleted.
            size_to_be_del = 0
            for disk in fs_disk_map[fs]:
                size_to_be_del += disk_cap[disk]['used_size']
            logger.debug("Identified data size going to be deleted from "
                         "filesystem ({0}): {1}".format(fs, size_to_be_del))

            other_disks = []
            for disk_name in disk_cap:
                if disk_name not in fs_disk_map[fs]:
                    other_disks.append(disk_name)
            logger.debug("Identified other disks of the filesystem "
                         "({0}): {1}".format(fs, other_disks))

            if not other_disks:
                msg = str("No free disks available to restripe data "
                          "for the filesystem {0}".format(fs))
                logger.error(msg)
                raise SpectrumScaleException(msg=msg,
                                             mmcmd="",
                                             cmdargs=[],
                                             rc=-1,
                                             stdout="",
                                             stderr="")

            size_avail_after_migration, total_free = 0, 0
            for disk in other_disks:
                # Accumulate free size on all disks.
                total_free += disk_cap[disk]['free_size']
                logger.debug("Identified free size in other disks of the "
                             "filesystem ({0}): {1}".format(fs, total_free))

            size_avail_after_migration = total_free - size_to_be_del
            logger.debug("Expected size after restriping of the filesystem "
                         "({0}): {1}".format(fs, size_avail_after_migration))

            percent = int(size_avail_after_migration * 100 / total_free)
            logger.debug("Expected percentage of size left after restriping "
                         "of the filesystem ({0}): {1}".format(fs, percent))

            if percent < 20:
                msg = ("Not enough space left for restriping data for "
                       "filesystem {0}".format(fs))
                logger.error(msg)
                raise SpectrumScaleException(msg=msg,
                                             mmcmd="",
                                             cmdargs=[],
                                             rc=-1,
                                             stdout="",
                                             stderr="")

            if fs_disk_map[fs]:
                # mmdeldisk will not be hit if there are no disks to delete.
                logger.info("Deleting disk(s) {0} from node "
                            "{1}".format(' '.join(map(str, fs_disk_map[fs])),
                                         node_to_del))
                SpectrumScaleDisk.delete_disk(node_to_del, fs, fs_disk_map[fs])

        if all_node_disks:
            # mmdelnsd will not be hot if there are no disks to delete.
            logger.info("Deleting all NSD(s) {0} attached to node "
                        "{1}".format(' '.join(map(str, all_node_disks)),
                                     node_to_del))
            SpectrumScaleNSD.delete_nsd(all_node_disks)

        logger.info("Unmounting filesystem(s) on {0}".format(node_to_del))
        SpectrumScaleFS.unmount_filesystems(node_to_del, wait=True)

        logger.info("Shutting down node {0}".format(node_to_del))
        SpectrumScaleNode.shutdown_node(node_to_del, wait=True)

        logger.info("Deleting storage node {0}".format(node_to_del))
        SpectrumScaleCluster.delete_node(node_to_del)

        removed_node_list.append(node_to_del)

    msg = str("Successfully removed node(s) {0} from the "
              "cluster".format(' '.join(map(str, removed_node_list))))

    logger.info(msg)
    logger.debug("Function Exit: remove_nodes(). "
                 "Return Params: rc={0} msg={1}".format(rc, msg))

    return rc, msg, result_json