Ejemplo n.º 1
0
def check_cluster_health() -> None:
    """ Check cluster status and make sure cluster is healthy """
    # Check if cluster running
    _, _, rc = SimpleCommand().run_cmd(PCS_CLUSTER_STATUS, check_error=False)
    if rc != 0:
        raise UpgradeError("Cluster is not running on current node")
    output, _, _ = SimpleCommand().run_cmd(PCS_FAILCOUNT_STATUS)
    if "INFINITY" in output:
        raise UpgradeError(
            f"Cluster is not stable, some resource are not healthy. {output}")
Ejemplo n.º 2
0
def backup_consul(filename: str = "consul-kv-dump.json", dst: str = BACKUP_DEST_DIR_CONSUL) -> None:
    """
    Backup Consul KV.

    Parameters:
        filename: Consul dump file
        dst: Directory with to backup Consul dump

    Return:
        None

    Exceptions:
        UpgradeError
    """
    consul_kv_dump = os.path.join(dst, filename)

    if os.path.exists(consul_kv_dump):
        for archive in glob(f"{dst}/*.tar.gz"):
            os.remove(archive)

        # Save previous one
        timestamp = strftime("%Y%m%d%H%M%S", gmtime())
        archive_name = os.path.join(dst, f"{consul_kv_dump}.{timestamp}.tar.gz")
        Log.info(f"Backup existing {consul_kv_dump} to {archive_name}")
        with tarfile.open(archive_name, "w:gz") as tar:
            tar.add(consul_kv_dump)
    else:
        os.makedirs(dst, exist_ok=True)

    consul_export_cmd = "consul kv export > {}".format(shlex.quote(consul_kv_dump))
    cp = subprocess.run(consul_export_cmd, shell=True, stderr=subprocess.PIPE)
    if cp.returncode:
        raise UpgradeError("Consul export failed with error {}".format(cp.stderr.decode()))
Ejemplo n.º 3
0
def _yaml_to_dict(yaml_file=None):
    '''
       Convert yaml format key value info in the form of
       python dictionary key value
    '''
    if yaml_file is None:
        raise UpgradeError('yaml file path can not be None. Please provide the \
                 HA yaml conf file path for conversion')
    with open(yaml_file, 'r') as conf_file:
        file_as_dict = yaml.safe_load(conf_file)
    return file_as_dict
Ejemplo n.º 4
0
def _check_for_any_resource_presence() -> None:
    '''Check if any resources are already present in a cluster.
       if yes, means, pre-upgrade steps failed. hence exit'''

    Log.info('Check for any resource presence in a cluster')

    root = _get_cib_xml()
    resource_list = [e.attrib["id"] for e in root.findall(".//lrm_resource")
                if "id" in e.attrib]

    if resource_list:
        raise UpgradeError('Some resources are already present in the cluster. \
                            Perform Upgrade process again')
Ejemplo n.º 5
0
def _switch_cluster_mode(cluster_mode, retry_count=0) -> None:
    '''
       Perform cluster operation to change the mode such as standby or
       unstandby and also retries the operation
    '''
    try:
        cluster_switch_mode_command = cluster_mode + f' --wait={CLUSTER_STANDBY_UNSTANDBY_TIMEOUT}'
        SimpleCommand().run_cmd(cluster_switch_mode_command)
    except Exception as err:
        if retry_count != 3:
            retry_count += 1
            _switch_cluster_mode(cluster_mode, retry_count)
        raise UpgradeError('Failed to switch the mode of the cluster. \
                            Retry upgrade again') from err
Ejemplo n.º 6
0
def cluster_standby_mode() -> None:
    """
    Put cluster to standby mode.

    Note: this function may be replaced by Cluster Manager call.

    Exceptions:
        UpgradeError
    """
    Log.info("Set cluster to standby mode")
    standby_cmd = "pcs node standby --all --wait=600"
    try:
        SimpleCommand().run_cmd(standby_cmd)
    except Exception as err:
        raise UpgradeError("Cluster standby operation failed") from err
Ejemplo n.º 7
0
def is_resource_deleted(timeout) -> None:
    """ Check if pre disruptive upgrade is successful """
    base_wait = 5
    while timeout > 0:
        resources = _get_resource_list()
        Log.info(
            f"Waiting for {str(timeout)} to delete resources {resources}.")
        if len(resources) == 0:
            Log.info("All resource deleted successfully.")
            break
        time.sleep(base_wait)
        timeout -= base_wait
    resources = _get_resource_list()
    if len(resources) != 0:
        raise UpgradeError(
            f"Failed to delete resource. Remaining resources {resources} ...")
Ejemplo n.º 8
0
def cluster_standby_mode() -> None:
    """
    Put cluster to standby mode.

    Note: this function may be replaced by Cluster Manager call.

    Exceptions:
        UpgradeError
    """
    Log.info("Set cluster to standby mode")
    Log.info("Please wait, standby can take max 20 to 30 min.")
    standby_cmd = f"{PCS_CLUSTER_STANDBY} --wait=1800"
    try:
        SimpleCommand().run_cmd(standby_cmd)
    except Exception as err:
        raise UpgradeError("Cluster standby operation failed")
Ejemplo n.º 9
0
def _load_config(ha_source_conf: str = SOURCE_CONFIG_FILE, \
                 ha_backup_conf: str = BACKUP_CONFIG_FILE) -> None:
    '''
       Load the new config at proper location after the
       RPM upgrade as part of post-upgrade process
    '''

    dest_dir = CONFIG_DIR
    new_src_dir = SOURCE_CONFIG_PATH

    # Convert yaml to dictionary
    old_backup_conf_dict = _yaml_to_dict(ha_backup_conf)
    new_conf_dict = _yaml_to_dict(ha_source_conf)

    # Note: There are 3 scenarios for conf file upgrade
    # 1. New conf key-value pair can be introduced
    # 2. Already present conf key can be updated with new value
    # 3. A conf key-value can be deleted
    # Here, we are considering or assuming that there will not be updation.
    # Upgrade means a new key value will be added. 2nd scenario can be handled
    # or needs to be handled seperately.
    # If key will be deleted after upgrade and still the conf will be loaded with that
    # key, it will not affect the functionality because, that key will not be in use.

    # Update the old dictionary with new one
    # This update will also update the values if they got changed in new
    # version. This is not considered right now, hence update can be safely used.
    # So, handling only first scenario here.
    old_backup_conf_dict.update(new_conf_dict)

    # Finally, update the old config file with new changes
    with open(ha_backup_conf, 'w') as outfile:
        yaml.dump(old_backup_conf_dict, outfile, default_flow_style=False)

    try:
        # Finally copy the updated backup conf file to a source
        copyfile(ha_backup_conf, ha_source_conf)

        # At last, copy the whole source directory which has updated
        # conf to a desired location
        if os.path.exists(new_src_dir) and os.listdir(new_src_dir):
            copystat(new_src_dir, dest_dir)
    except Exception as err:
        raise UpgradeError('Failed to load the new config after \
                       upgrading the RPM. Please retry Upgrade process again'                                                                             ) \
                       from err
Ejemplo n.º 10
0
def delete_resources() -> None:
    """
    Delete pacemaker resources.

    Exceptions:
        UpgradeError
    """
    try:
        root = _get_cib_xml()
        resources = [e.attrib["id"] for e in root.findall(".//lrm_resource")
                     if "id" in e.attrib]
        Log.info(f"Going to delete following resources: {resources}")

        for r in resources:
            Log.info(f"Deleting {r}")
            SimpleCommand().run_cmd(f"pcs resource delete {r}")
    except Exception as err:
        raise UpgradeError("Resource deletion failed") from err
Ejemplo n.º 11
0
def delete_resources() -> None:
    """
    Delete pacemaker resources.

    Exceptions:
        UpgradeError
    """
    try:
        resources = _get_resource_list()
        Log.info(f"Going to delete following resources: {resources}")
        for r in resources:
            Log.info(f"Deleting resource {r}")
            SimpleCommand().run_cmd(
                PCS_DELETE_RESOURCE.replace("<resource>", r))
        SimpleCommand().run_cmd(PCS_CLEANUP)
        Log.info("Wait 2 min till all resource deleted.")
        is_resource_deleted(120)
    except Exception as err:
        raise UpgradeError("Resource deletion failed")
Ejemplo n.º 12
0
def backup_configuration(src: str = CONFIG_DIR, dst: str = BACKUP_DEST_DIR_CONF) -> None:
    """
    Backup HA configuration.

    Parameters:
        src: HA configs location
        dst: Directory with HA config backup

    Return:
        None

    Exceptions:
        UpgradeError
    """
    Log.info(f"Backup HA configuration from {src} to {dst}")
    try:
        if os.path.exists(dst):
            rmtree(dst)
        copytree(src, dst)
    except Exception as err:
        raise UpgradeError("Failed to create backup of HA config") from err