def verify_image_versions(old_images, upgrade_version): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS """ number_of_worker_nodes = len(get_typed_nodes()) osd_count = get_osd_count() verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID, # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID nooba_pods = 2 if upgrade_version < parse_version('4.3') else 3 verify_pods_upgraded(old_images, selector=constants.NOOBAA_APP_LABEL, count=nooba_pods) verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded(old_images, selector=constants.MON_APP_LABEL, count=3) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) # OSD upgrade have timeout 10mins for new attempt if cluster is not health. # https://bugzilla.redhat.com/show_bug.cgi?id=1840729 setting timeout for # 12.5 minutes per OSD verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=750 * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get('platform') in constants.ON_PREM_PLATFORMS or ( config.ENV_DATA.get('platform') == constants.AZURE_PLATFORM): # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1857802 - RGW count is 1 # post upgrade to OCS 4.5. Tracked with # https://github.com/red-hat-storage/ocs-ci/issues/2532 # TODO: uncomment the below 1 line: # rgw_count = 2 if float(config.ENV_DATA['ocs_version']) >= 4.5 else 1 # TODO: Delete the below 1 line rgw_count = 1 verify_pods_upgraded(old_images, selector=constants.RGW_APP_LABEL, count=rgw_count)
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) osd_count = get_osd_count() verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID, # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID nooba_pods = 2 if upgrade_version < parse_version("4.3") else 3 verify_pods_upgraded(old_images, selector=constants.NOOBAA_APP_LABEL, count=nooba_pods) verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, )
def verify_image_versions(old_images, upgrade_version): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS """ number_of_worker_nodes = len(get_typed_nodes()) osd_count = get_osd_count() verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID, # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID nooba_pods = 2 if upgrade_version < parse_version('4.3') else 3 verify_pods_upgraded(old_images, selector=constants.NOOBAA_APP_LABEL, count=nooba_pods) verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded(old_images, selector=constants.MON_APP_LABEL, count=3) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) # OSD upgrade have timeout 10mins for new attempt if cluster is not health. # https://bugzilla.redhat.com/show_bug.cgi?id=1840729 setting timeout for # 12.5 minutes per OSD verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=750 * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get('platform') == constants.VSPHERE_PLATFORM: verify_pods_upgraded(old_images, selector=constants.RGW_APP_LABEL, count=1)
def get_images_post_upgrade(self, channel, pre_upgrade_images, upgrade_version): """ Checks if all images of OCS cluster upgraded, and return list of all images if upgrade success Args: channel: (str): OCS subscription channel pre_upgrade_images: (dict): Contains all OCS cluster images upgrade_version: (str): version to be upgraded Returns: set: Contains full path of OCS cluster old images """ operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, subscription_plan_approval=self.subscription_plan_approval, ) csv_name_post_upgrade = package_manifest.get_current_csv(channel) csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=self.namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state" ) # Workaround for patching missing ceph-rook-tools pod after upgrade if self.version_before_upgrade == "4.2" and upgrade_version == "4.3": log.info("Force creating Ceph toolbox after upgrade 4.2 -> 4.3") setup_ceph_toolbox(force_setup=True) # End of workaround if config.DEPLOYMENT.get("external_mode") or config.ENV_DATA.get( "mcg_only_deployment"): timeout = 200 else: timeout = 200 * get_osd_count() csv_post_upgrade.wait_for_phase("Succeeded", timeout=timeout) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) return old_images
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) default_noobaa_pods = 3 noobaa_pods = default_noobaa_pods if upgrade_version >= parse_version("4.7"): noobaa = OCP(kind="noobaa", namespace=config.ENV_DATA["cluster_namespace"]) resource = noobaa.get()["items"][0] endpoints = resource.get("spec", {}).get("endpoints", {}) max_endpoints = endpoints.get("maxCount", constants.MAX_NB_ENDPOINT_COUNT) min_endpoints = endpoints.get( "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT) noobaa_pods = default_noobaa_pods + min_endpoints try: verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, ) except TimeoutException as ex: if upgrade_version >= parse_version("4.7"): log.info( "Nooba pods didn't match. Trying once more with max noobaa endpoints!" f"Exception: {ex}") noobaa_pods = default_noobaa_pods + max_endpoints verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, timeout=60, ) else: raise verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) if not config.DEPLOYMENT.get("external_mode"): verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 osd_count = get_osd_count() verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, )
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) default_noobaa_pods = 3 noobaa_pods = default_noobaa_pods if upgrade_version >= parse_version("4.7"): noobaa = OCP(kind="noobaa", namespace=config.ENV_DATA["cluster_namespace"]) resource = noobaa.get()["items"][0] endpoints = resource.get("spec", {}).get("endpoints", {}) max_endpoints = endpoints.get("maxCount", constants.MAX_NB_ENDPOINT_COUNT) min_endpoints = endpoints.get( "minCount", constants.MIN_NB_ENDPOINT_COUNT_POST_DEPLOYMENT) noobaa_pods = default_noobaa_pods + min_endpoints try: verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, ) except TimeoutException as ex: if upgrade_version >= parse_version("4.7"): log.info( "Nooba pods didn't match. Trying once more with max noobaa endpoints!" f"Exception: {ex}") noobaa_pods = default_noobaa_pods + max_endpoints verify_pods_upgraded( old_images, selector=constants.NOOBAA_APP_LABEL, count=noobaa_pods, timeout=60, ) else: raise verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) if not config.DEPLOYMENT.get("external_mode"): verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 osd_count = get_osd_count() # In the debugging issue: # https://github.com/red-hat-storage/ocs-ci/issues/5031 # Noticed that it's taking about 1 more minute from previous check till actual # OSD pods getting restarted. # Hence adding sleep here for 120 seconds to be sure, OSD pods upgrade started. log.info("Waiting for 2 minutes before start checking OSD pods") time.sleep(120) verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: rgw_count = get_rgw_count(upgrade_version.base_version, True, version_before_upgrade) verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, ) if upgrade_version >= parse_version("4.6"): verify_pods_upgraded(old_images, selector=constants.OCS_METRICS_EXPORTER)
def test_upgrade(): ceph_cluster = CephCluster() with CephHealthMonitor(ceph_cluster): namespace = config.ENV_DATA['cluster_namespace'] version_before_upgrade = config.ENV_DATA.get("ocs_version") upgrade_version = config.UPGRADE.get("upgrade_ocs_version", version_before_upgrade) ocs_registry_image = config.UPGRADE.get('upgrade_ocs_registry_image') if ocs_registry_image: upgrade_version = get_ocs_version_from_image(ocs_registry_image) parsed_version_before_upgrade = parse_version(version_before_upgrade) parsed_upgrade_version = parse_version(upgrade_version) assert parsed_upgrade_version >= parsed_version_before_upgrade, ( f"Version you would like to upgrade to: {upgrade_version} " f"is not higher or equal to the version you currently running: " f"{version_before_upgrade}") operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, ) channel = config.DEPLOYMENT.get('ocs_csv_channel') csv_name_pre_upgrade = package_manifest.get_current_csv(channel) log.info(f"CSV name before upgrade is: {csv_name_pre_upgrade}") csv_pre_upgrade = CSV(resource_name=csv_name_pre_upgrade, namespace=namespace) pre_upgrade_images = get_images(csv_pre_upgrade.get()) version_change = parsed_upgrade_version > parsed_version_before_upgrade if version_change: version_config_file = os.path.join(constants.CONF_DIR, 'ocs_version', f'ocs-{upgrade_version}.yaml') load_config_file(version_config_file) ocs_catalog = CatalogSource( resource_name=constants.OPERATOR_CATALOG_SOURCE_NAME, namespace=constants.MARKETPLACE_NAMESPACE, ) upgrade_in_current_source = config.UPGRADE.get( 'upgrade_in_current_source', False) if not upgrade_in_current_source: if not ocs_catalog.is_exist() and not upgrade_in_current_source: log.info("OCS catalog source doesn't exist. Creating new one.") create_catalog_source(ocs_registry_image, ignore_upgrade=True) image_url = ocs_catalog.get_image_url() image_tag = ocs_catalog.get_image_name() log.info(f"Current image is: {image_url}, tag: {image_tag}") if ocs_registry_image: image_url, new_image_tag = ocs_registry_image.split(':') elif (config.UPGRADE.get('upgrade_to_latest', True) or version_change): new_image_tag = get_latest_ds_olm_tag() else: new_image_tag = get_next_version_available_for_upgrade( image_tag) cs_data = deepcopy(ocs_catalog.data) image_for_upgrade = ':'.join([image_url, new_image_tag]) log.info(f"Image: {image_for_upgrade} will be used for upgrade.") cs_data['spec']['image'] = image_for_upgrade with NamedTemporaryFile() as cs_yaml: dump_data_to_temp_yaml(cs_data, cs_yaml.name) ocs_catalog.apply(cs_yaml.name) # Wait for the new package manifest for upgrade. operator_selector = get_selector_for_ocs_operator() package_manifest = PackageManifest( resource_name=OCS_OPERATOR_NAME, selector=operator_selector, ) package_manifest.wait_for_resource() channel = config.DEPLOYMENT.get('ocs_csv_channel') if not channel: channel = package_manifest.get_default_channel() # update subscription subscription = OCP( resource_name=constants.OCS_SUBSCRIPTION, kind='subscription', namespace=config.ENV_DATA['cluster_namespace'], ) current_ocs_source = subscription.data['spec']['source'] log.info(f"Current OCS subscription source: {current_ocs_source}") ocs_source = current_ocs_source if upgrade_in_current_source else ( constants.OPERATOR_CATALOG_SOURCE_NAME) patch_subscription_cmd = ( f'oc patch subscription {constants.OCS_SUBSCRIPTION} ' f'-n {namespace} --type merge -p \'{{"spec":{{"channel": ' f'"{channel}", "source": "{ocs_source}"}}}}\'') run_cmd(patch_subscription_cmd) subscription_plan_approval = config.DEPLOYMENT.get( 'subscription_plan_approval') if subscription_plan_approval == 'Manual': wait_for_install_plan_and_approve(namespace) attempts = 145 for attempt in range(1, attempts + 1): log.info(f"Attempt {attempt}/{attempts} to check CSV upgraded.") csv_name_post_upgrade = package_manifest.get_current_csv(channel) if csv_name_post_upgrade == csv_name_pre_upgrade: log.info(f"CSV is still: {csv_name_post_upgrade}") sleep(5) else: log.info(f"CSV now upgraded to: {csv_name_post_upgrade}") break if attempts == attempt: raise TimeoutException("No new CSV found after upgrade!") csv_post_upgrade = CSV(resource_name=csv_name_post_upgrade, namespace=namespace) log.info( f"Waiting for CSV {csv_name_post_upgrade} to be in succeeded state" ) if version_before_upgrade == '4.2' and upgrade_version == '4.3': log.info("Force creating Ceph toolbox after upgrade 4.2 -> 4.3") setup_ceph_toolbox(force_setup=True) osd_count = get_osd_count() csv_post_upgrade.wait_for_phase("Succeeded", timeout=200 * osd_count) post_upgrade_images = get_images(csv_post_upgrade.get()) old_images, _, _ = get_upgrade_image_info(pre_upgrade_images, post_upgrade_images) verify_image_versions(old_images, parsed_upgrade_version) ocs_install_verification( timeout=600, skip_osd_distribution_check=True, ocs_registry_image=ocs_registry_image, post_upgrade_verification=True, )
def verify_image_versions(old_images, upgrade_version, version_before_upgrade): """ Verify if all the images of OCS objects got upgraded Args: old_images (set): set with old images upgrade_version (packaging.version.Version): version of OCS version_before_upgrade (float): version of OCS before upgrade """ number_of_worker_nodes = len(get_nodes()) osd_count = get_osd_count() verify_pods_upgraded(old_images, selector=constants.OCS_OPERATOR_LABEL) verify_pods_upgraded(old_images, selector=constants.OPERATOR_LABEL) # in 4.3 app selector nooba have those pods: noobaa-core-ID, noobaa-db-ID, # noobaa-operator-ID but in 4.2 only 2: noobaa-core-ID, noobaa-operator-ID nooba_pods = 2 if upgrade_version < parse_version("4.3") else 3 verify_pods_upgraded(old_images, selector=constants.NOOBAA_APP_LABEL, count=nooba_pods) verify_pods_upgraded( old_images, selector=constants.CSI_CEPHFSPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_CEPHFSPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.CSI_RBDPLUGIN_LABEL, count=number_of_worker_nodes, ) verify_pods_upgraded(old_images, selector=constants.CSI_RBDPLUGIN_PROVISIONER_LABEL, count=2) verify_pods_upgraded( old_images, selector=constants.MON_APP_LABEL, count=3, ) verify_pods_upgraded(old_images, selector=constants.MGR_APP_LABEL) osd_timeout = 600 if upgrade_version >= parse_version("4.5") else 750 verify_pods_upgraded( old_images, selector=constants.OSD_APP_LABEL, count=osd_count, timeout=osd_timeout * osd_count, ) verify_pods_upgraded(old_images, selector=constants.MDS_APP_LABEL, count=2) if config.ENV_DATA.get("platform") in constants.ON_PREM_PLATFORMS: # RGW count is 1 if the cluster was upgraded from <= 4.4 # Related bug - https://bugzilla.redhat.com/show_bug.cgi?id=1857802 rgw_count = 2 if float(version_before_upgrade) >= 4.5 else 1 verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, ) # With 4.4 OCS cluster deployed over Azure, RGW is the default backingstore if config.ENV_DATA.get("platform") == constants.AZURE_PLATFORM: if float(config.ENV_DATA["ocs_version"]) == 4.4 or ( float(config.ENV_DATA["ocs_version"]) == 4.5 and float(version_before_upgrade) < 4.5): rgw_count = 1 verify_pods_upgraded( old_images, selector=constants.RGW_APP_LABEL, count=rgw_count, )