def remove_arakoon_cluster(cluster_name, master_storagerouter_ip): """ Delete a whole arakoon cluster :param cluster_name: name of a existing arakoon cluster :type cluster_name: str :param master_storagerouter_ip: master ip address of a existing arakoon cluster :type master_storagerouter_ip: str """ ArakoonRemover.LOGGER.info( "Starting removing arakoon cluster with name `{0}`, master_ip `{1}`" .format(cluster_name, master_storagerouter_ip)) arakoon_installer = ArakoonInstaller(cluster_name) arakoon_installer.load() arakoon_installer.delete_cluster() ArakoonRemover.LOGGER.info( "Finished removing arakoon cluster with name `{0}`, master_ip `{1}`" .format(cluster_name, master_storagerouter_ip))
def shrink_vpool(cls, storagedriver_guid, offline_storage_router_guids=list()): """ Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well) :param storagedriver_guid: Guid of the StorageDriver to remove :type storagedriver_guid: str :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster. WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS :type offline_storage_router_guids: list :return: None :rtype: NoneType """ # TODO: Add logging # TODO: Unit test individual pieces of code # Validations storagedriver = StorageDriver(storagedriver_guid) storagerouter = storagedriver.storagerouter cls._logger.info( 'StorageDriver {0} - Deleting StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) vp_installer = VPoolInstaller(name=storagedriver.vpool.name) vp_installer.validate(storagedriver=storagedriver) sd_installer = StorageDriverInstaller(vp_installer=vp_installer, storagedriver=storagedriver) cls._logger.info( 'StorageDriver {0} - Checking availability of related StorageRouters' .format(storagedriver.guid, storagedriver.name)) sr_client_map = SSHClient.get_clients(endpoints=[ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ], user_names=['root']) sr_installer = StorageRouterInstaller(root_client=sr_client_map.get( storagerouter, {}).get('root'), storagerouter=storagerouter, vp_installer=vp_installer, sd_installer=sd_installer) offline_srs = sr_client_map.pop('offline') if sorted([sr.guid for sr in offline_srs ]) != sorted(offline_storage_router_guids): raise RuntimeError('Not all StorageRouters are reachable') if storagerouter not in offline_srs: mtpt_pids = sr_installer.root_client.run( "lsof -t +D '/mnt/{0}' || true".format( vp_installer.name.replace(r"'", r"'\''")), allow_insecure=True).splitlines() if len(mtpt_pids) > 0: raise RuntimeError( 'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}' .format(', '.join(mtpt_pids))) # Retrieve reachable StorageDrivers reachable_storagedrivers = [] for sd in vp_installer.vpool.storagedrivers: if sd.storagerouter not in sr_client_map: # StorageRouter is offline continue sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format( vp_installer.vpool.guid, sd.storagedriver_id) if Configuration.exists(sd_key) is True: path = Configuration.get_configuration_path(sd_key) with remote(sd.storagerouter.ip, [LocalStorageRouterClient]) as rem: try: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision( ) # 'Cheap' call to verify whether volumedriver is responsive cls._logger.info( 'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}' .format(storagedriver.guid, sd.name, sd.storagerouter.ip)) reachable_storagedrivers.append(sd) except Exception as exception: if not is_connection_failure(exception): raise if len(reachable_storagedrivers) == 0: raise RuntimeError( 'Could not find any responsive node in the cluster') # Start removal if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.SHRINKING) else: vp_installer.update_status(status=VPool.STATUSES.DELETING) # Clean up stale vDisks cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format( storagedriver.guid)) VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool) # Reconfigure the MDSes cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format( storagedriver.guid)) for vdisk_guid in storagerouter.vdisks_guids: try: MDSServiceController.ensure_safety( vdisk_guid=vdisk_guid, excluded_storagerouter_guids=[storagerouter.guid] + offline_storage_router_guids) except Exception: cls._logger.exception( 'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed' .format(storagedriver.guid, vdisk_guid)) # Validate that all MDSes on current StorageRouter have been moved away # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code vdisks = [] for mds in vp_installer.mds_services: for junction in mds.vdisks: vdisk = junction.vdisk if vdisk in vdisks: continue vdisks.append(vdisk) cls._logger.critical( 'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away' .format(storagedriver.guid, vdisk.guid, vdisk.name)) if len(vdisks) > 0: # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway vp_installer.update_status(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Not all MDS Services have been successfully migrated away') # Start with actual removal errors_found = False if storagerouter not in offline_srs: errors_found &= sd_installer.stop_services() errors_found &= vp_installer.configure_cluster_registry( exclude=[storagedriver], apply_on=reachable_storagedrivers) errors_found &= vp_installer.update_node_distance_map() errors_found &= vp_installer.remove_mds_services() errors_found &= sd_installer.clean_config_management() errors_found &= sd_installer.clean_model() if storagerouter not in offline_srs: errors_found &= sd_installer.clean_directories( mountpoints=StorageRouterController.get_mountpoints( client=sr_installer.root_client)) try: DiskController.sync_with_reality( storagerouter_guid=storagerouter.guid) except Exception: cls._logger.exception( 'StorageDriver {0} - Synchronizing disks with reality failed' .format(storagedriver.guid)) errors_found = True if vp_installer.storagedriver_amount > 1: # Update the vPool metadata and run DTL checkup vp_installer.vpool.metadata['caching_info'].pop( sr_installer.storagerouter.guid, None) vp_installer.vpool.save() try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except Exception: cls._logger.exception( 'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}' .format(storagedriver.guid, vp_installer.name, vp_installer.vpool.guid)) else: cls._logger.info( 'StorageDriver {0} - Removing vPool from model'.format( storagedriver.guid)) # Clean up model try: vp_installer.vpool.delete() except Exception: errors_found = True cls._logger.exception( 'StorageDriver {0} - Cleaning up vPool from the model failed' .format(storagedriver.guid)) Configuration.delete('/ovs/vpools/{0}'.format( vp_installer.vpool.guid)) cls._logger.info('StorageDriver {0} - Running MDS checkup'.format( storagedriver.guid)) try: MDSServiceController.mds_checkup() except Exception: cls._logger.exception( 'StorageDriver {0} - MDS checkup failed'.format( storagedriver.guid)) # Update vPool status if errors_found is True: if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise RuntimeError( '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information' ) if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info( 'StorageDriver {0} - Deleted StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) if len(VPoolList.get_vpools()) == 0: cluster_name = ArakoonInstaller.get_cluster_name('voldrv') if ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name)['internal'] is True: cls._logger.debug( 'StorageDriver {0} - Removing Arakoon cluster {1}'.format( storagedriver.guid, cluster_name)) try: installer = ArakoonInstaller(cluster_name=cluster_name) installer.load() installer.delete_cluster() except Exception: cls._logger.exception( 'StorageDriver {0} - Delete voldrv Arakoon cluster failed' .format(storagedriver.guid)) service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in list(service_type.services): if service.name == service_name: service.delete() # Remove watcher volumedriver service if last StorageDriver on current StorageRouter if len( storagerouter.storagedrivers ) == 0 and storagerouter not in offline_srs: # ensure client is initialized for StorageRouter try: if cls._service_manager.has_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client): cls._service_manager.stop_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) cls._service_manager.remove_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) except Exception: cls._logger.exception( 'StorageDriver {0} - {1} service deletion failed'.format( storagedriver.guid, ServiceFactory.SERVICE_WATCHER_VOLDRV))