def _remove_vpool(self): """ Clean up This is not actually a test of "Remove Vpool from OVS", so any failure here will be reported as a tearDown error and no cleanup will occur """ self._debug('Removing vpool') vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME) if vpool is None: self._debug('already removed') return for storagedriver_guid in vpool.storagedrivers_guids: self._debug( 'removing storagedriver {0}'.format(storagedriver_guid)) StorageRouterController.remove_storagedriver(storagedriver_guid) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME) if vpool is None: self._debug('vpool {0} deleted'.format( OVSPluginTestCase.VPOOL_NAME)) return attempt += 1 time.sleep(2) raise RuntimeError('Vpool {0} was not removed correctly.'.format( OVSPluginTestCase.VPOOL_NAME))
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter( data={ 'name': 'Openstack', 'description': 'test', 'username': OVSPluginTestCase.CINDER_USER, 'password': OVSPluginTestCase.CINDER_PASS, 'ip': OVSPluginTestCase.CINDER_CONTROLLER, 'port': 80, 'type': 'OPENSTACK', 'metadata': { 'integratemgmt': True } }) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') parameters = { 'storagerouter_ip': OVSPluginTestCase.ip, 'vpool_name': OVSPluginTestCase.VPOOL_NAME, 'type': 'local', 'storage_ip': '127.0.0.1', # KVM 'vrouter_port': OVSPluginTestCase.VPOOL_PORT, 'integrate_vpool': True, 'connection_host': OVSPluginTestCase.ip, 'connection_port': OVSPluginTestCase.VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, 'readcache_size': 50, 'writecache_size': 50 } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME) if vpool is not None: self._debug('vpool {0} created'.format( OVSPluginTestCase.VPOOL_NAME)) try: os.listdir(OVSPluginTestCase.VPOOL_MOUNTPOINT) return vpool except Exception as ex: # either it doesn't exist, or we don't have permission self._debug('vpool not ready yet {0}'.format(str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError( 'Vpool {0} was not modeled correctly or did not start.'.format( OVSPluginTestCase.VPOOL_NAME))
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter(data={'name':'Openstack', 'description':'test', 'username':CINDER_USER, 'password':CINDER_PASS, 'ip':CINDER_CONTROLLER, 'port':80, 'type':'OPENSTACK', 'metadata':{'integratemgmt':True}}) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') backend_type = 'local' fields = ['storage_ip', 'vrouter_port'] parameters = {'storagerouter_ip': IP, 'vpool_name': VPOOL_NAME, 'type': 'local', 'mountpoint_bfs': VPOOL_BFS, 'mountpoint_temp': VPOOL_TEMP, 'mountpoint_md': VPOOL_MD, 'mountpoint_readcaches': [VPOOL_READCACHE], 'mountpoint_writecaches': [VPOOL_WRITECACHE], 'mountpoint_foc': VPOOL_FOC, 'storage_ip': '127.0.0.1', #KVM 'vrouter_port': VPOOL_PORT, 'integrate_vpool': True, 'connection_host': IP, 'connection_port': VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug('vpool %s created' % VPOOL_NAME) try: os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: #either it doesn't exist, or we don't have permission self._debug('vpool not ready yet %s' % (str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError('Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter( data={ "name": "Openstack", "description": "test", "username": CINDER_USER, "password": CINDER_PASS, "ip": CINDER_CONTROLLER, "port": 80, "type": "OPENSTACK", "metadata": {"integratemgmt": True}, } ) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug("Creating vpool") parameters = { "storagerouter_ip": IP, "vpool_name": VPOOL_NAME, "type": "local", "storage_ip": "127.0.0.1", # KVM "vrouter_port": VPOOL_PORT, "integrate_vpool": True, "connection_host": IP, "connection_port": VPOOL_PORT, "connection_username": "", "connection_password": "", "connection_backend": {}, "readcache_size": 50, "writecache_size": 50, } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug("vpool {0} created".format(VPOOL_NAME)) try: os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: # either it doesn't exist, or we don't have permission self._debug("vpool not ready yet {0}".format(str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError("Vpool {0} was not modeled correctly or did not start.".format(VPOOL_NAME))
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter(data={'name': 'Openstack', 'description': 'test', 'username': OVSPluginTestCase.CINDER_USER, 'password': OVSPluginTestCase.CINDER_PASS, 'ip': OVSPluginTestCase.CINDER_CONTROLLER, 'port': 80, 'type': 'OPENSTACK', 'metadata': {'integratemgmt': True}}) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') parameters = {'storagerouter_ip': OVSPluginTestCase.ip, 'vpool_name': OVSPluginTestCase.VPOOL_NAME, 'type': 'local', 'storage_ip': '127.0.0.1', # KVM 'vrouter_port': OVSPluginTestCase.VPOOL_PORT, 'integrate_vpool': True, 'connection_host': OVSPluginTestCase.ip, 'connection_port': OVSPluginTestCase.VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, 'readcache_size': 50, 'writecache_size': 50 } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME) if vpool is not None: self._debug('vpool {0} created'.format(OVSPluginTestCase.VPOOL_NAME)) try: os.listdir(OVSPluginTestCase.VPOOL_MOUNTPOINT) return vpool except Exception as ex: # either it doesn't exist, or we don't have permission self._debug('vpool not ready yet {0}'.format(str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError('Vpool {0} was not modeled correctly or did not start.'.format(OVSPluginTestCase.VPOOL_NAME))
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ self._debug('Creating vpool') backend_type = 'local' fields = ['storage_ip', 'vrouter_port'] parameters = { 'storagerouter_ip': IP, 'vpool_name': VPOOL_NAME, 'type': 'LOCAL', 'mountpoint_bfs': VPOOL_BFS, 'mountpoint_temp': VPOOL_TEMP, 'mountpoint_md': VPOOL_MD, 'mountpoint_readcache1': VPOOL_READCACHE1, 'mountpoint_readcache2': VPOOL_READCACHE2, 'mountpoint_writecache': VPOOL_WRITECACHE, 'mountpoint_foc': VPOOL_FOC, 'storage_ip': '127.0.0.1', #KVM 'vrouter_port': VPOOL_PORT } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug('vpool %s created' % VPOOL_NAME) try: self._get_shell_client() self.shell_client('chown %s %s' % (self.current_user_id, VPOOL_MOUNTPOINT)) os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: #either it doesn't exist, or we don't have permission self._debug('vpool not ready yet %s' % (str(ex))) pass attempt += 1 time.sleep(1) raise RuntimeError( 'Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ self._debug('Creating vpool') backend_type = 'local' fields = ['storage_ip', 'vrouter_port'] parameters = {'storagerouter_ip': IP, 'vpool_name': VPOOL_NAME, 'type': 'local', 'mountpoint_bfs': VPOOL_BFS, 'mountpoint_temp': VPOOL_TEMP, 'mountpoint_md': VPOOL_MD, 'mountpoint_readcache1': VPOOL_READCACHE1, 'mountpoint_readcache2': VPOOL_READCACHE2, 'mountpoint_writecache': VPOOL_WRITECACHE, 'mountpoint_foc': VPOOL_FOC, 'storage_ip': '127.0.0.1', #KVM 'vrouter_port': VPOOL_PORT } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug('vpool %s created' % VPOOL_NAME) try: self._get_shell_client() self.shell_client('sudo chown %s %s' % (self.current_user_id, VPOOL_MOUNTPOINT)) self.shell_client('sudo chmod 775 %s' % (VPOOL_MOUNTPOINT)) os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: #either it doesn't exist, or we don't have permission self._debug('vpool not ready yet %s' % (str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError('Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
def _remove_vpool(self): """ Clean up This is not actually a test of "Remove Vpool from OVS", so any failure here will be reported as a tearDown error and no cleanup will occur """ self._debug('Removing vpool') vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is None: self._debug('already removed') return for storagedriver_guid in vpool.storagedrivers_guids: self._debug('removing storagedriver {0}'.format(storagedriver_guid)) StorageRouterController.remove_storagedriver(storagedriver_guid) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is None: self._debug('vpool {0} deleted'.format(VPOOL_NAME)) return attempt += 1 time.sleep(2) raise RuntimeError('Vpool {0} was not removed correctly.'.format(VPOOL_NAME))
def add_vpool(cls, parameters): """ Add a vPool to the machine this task is running on :param parameters: Parameters for vPool creation :type parameters: dict :return: None :rtype: NoneType """ # TODO: Add logging cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters)) # VALIDATIONS if not isinstance(parameters, dict): raise ValueError( 'Parameters passed to create a vPool should be of type dict') # Check StorageRouter existence storagerouter = StorageRouterList.get_by_ip( ip=parameters.get('storagerouter_ip')) if storagerouter is None: raise RuntimeError('Could not find StorageRouter') # Validate requested vPool configurations vp_installer = VPoolInstaller(name=parameters.get('vpool_name')) vp_installer.validate(storagerouter=storagerouter) # Validate requested StorageDriver configurations cls._logger.info( 'vPool {0}: Validating StorageDriver configurations'.format( vp_installer.name)) sd_installer = StorageDriverInstaller( vp_installer=vp_installer, configurations={ 'storage_ip': parameters.get('storage_ip'), 'caching_info': parameters.get('caching_info'), 'backend_info': { 'main': parameters.get('backend_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('backend_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('backend_info_fc') }, 'connection_info': { 'main': parameters.get('connection_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('connection_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('connection_info_fc') }, 'sd_configuration': parameters.get('config_params') }) partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format( storagerouter.guid)) try: # VPOOL CREATION # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state) if vp_installer.is_new is True: vp_installer.create(rdma_enabled=sd_installer.rdma_enabled) vp_installer.configure_mds( config=parameters.get('mds_config_params', {})) else: vp_installer.update_status(status=VPool.STATUSES.EXTENDING) # ADDITIONAL VALIDATIONS # Check StorageRouter connectivity cls._logger.info( 'vPool {0}: Validating StorageRouter connectivity'.format( vp_installer.name)) linked_storagerouters = [storagerouter] if vp_installer.is_new is False: linked_storagerouters += [ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ] sr_client_map = SSHClient.get_clients( endpoints=linked_storagerouters, user_names=['ovs', 'root']) offline_nodes = sr_client_map.pop('offline') if storagerouter in offline_nodes: raise RuntimeError( 'Node on which the vPool is being {0} is not reachable'. format('created' if vp_installer.is_new is True else 'extended')) sr_installer = StorageRouterInstaller( root_client=sr_client_map[storagerouter]['root'], sd_installer=sd_installer, vp_installer=vp_installer, storagerouter=storagerouter) # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context partitions_mutex.acquire(wait=60) sr_installer.partition_info = StorageRouterController.get_partition_info( storagerouter_guid=storagerouter.guid) sr_installer.validate_vpool_extendable() sr_installer.validate_global_write_buffer( requested_size=parameters.get('writecache_size', 0)) sr_installer.validate_local_cache_size( requested_proxies=parameters.get('parallelism', {}).get( 'proxies', 2)) # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS sd_installer.create() sd_installer.create_partitions() partitions_mutex.release() vp_installer.refresh_metadata() except Exception: cls._logger.exception( 'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}' .format(vp_installer.name, storagerouter.name)) partitions_mutex.release() vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise # Arakoon setup counter = 0 while counter < 300: try: if StorageDriverController.manual_voldrv_arakoon_checkup( ) is True: break except Exception: cls._logger.exception( 'Arakoon checkup for voldrv cluster failed') vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise counter += 1 time.sleep(1) if counter == 300: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Arakoon checkup for the StorageDriver cluster could not be started' ) # Cluster registry try: vp_installer.configure_cluster_registry(allow_raise=True) except Exception: if vp_installer.is_new is True: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) else: vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE) raise try: sd_installer.setup_proxy_configs() sd_installer.configure_storagedriver_service() DiskController.sync_with_reality(storagerouter.guid) MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vp_installer.vpool) # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver) vp_installer.vpool.invalidate_dynamics('configuration') if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[ 'mds_config']['mds_safety'] != vp_installer.mds_safety: Configuration.set( key='/ovs/vpools/{0}/mds_config|mds_safety'.format( vp_installer.vpool.guid), value=vp_installer.mds_safety) sd_installer.start_services( ) # Create and start watcher volumedriver, DTL, proxies and StorageDriver services # Post creation/extension checkups mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vp_installer.vpool, offline_nodes=offline_nodes) for sr, clients in sr_client_map.iteritems(): for current_storagedriver in [ sd for sd in sr.storagedrivers if sd.vpool_guid == vp_installer.vpool.guid ]: storagedriver_config = StorageDriverConfiguration( vpool_guid=vp_installer.vpool.guid, storagedriver_id=current_storagedriver.storagedriver_id ) if storagedriver_config.config_missing is False: # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ sr.guid]) storagedriver_config.save(client=clients['ovs']) # Everything's reconfigured, refresh new cluster configuration for current_storagedriver in vp_installer.vpool.storagedrivers: if current_storagedriver.storagerouter not in sr_client_map: continue vp_installer.vpool.storagedriver_client.update_cluster_node_configs( str(current_storagedriver.storagedriver_id), req_timeout_secs=10) except Exception: cls._logger.exception('vPool {0}: Creation failed'.format( vp_installer.name)) vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise # When a node is offline, we can run into errors, but also when 1 or more volumes are not running # Scheduled tasks below, so don't really care whether they succeed or not try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except: pass for vdisk in vp_installer.vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except: pass vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info('Add vPool {0} ended successfully'.format( vp_installer.name))
def shrink_vpool(cls, storagedriver_guid, offline_storage_router_guids=list()): """ Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well) :param storagedriver_guid: Guid of the StorageDriver to remove :type storagedriver_guid: str :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster. WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS :type offline_storage_router_guids: list :return: None :rtype: NoneType """ # TODO: Add logging # TODO: Unit test individual pieces of code # Validations storagedriver = StorageDriver(storagedriver_guid) storagerouter = storagedriver.storagerouter cls._logger.info( 'StorageDriver {0} - Deleting StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) vp_installer = VPoolInstaller(name=storagedriver.vpool.name) vp_installer.validate(storagedriver=storagedriver) sd_installer = StorageDriverInstaller(vp_installer=vp_installer, storagedriver=storagedriver) cls._logger.info( 'StorageDriver {0} - Checking availability of related StorageRouters' .format(storagedriver.guid, storagedriver.name)) sr_client_map = SSHClient.get_clients(endpoints=[ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ], user_names=['root']) sr_installer = StorageRouterInstaller(root_client=sr_client_map.get( storagerouter, {}).get('root'), storagerouter=storagerouter, vp_installer=vp_installer, sd_installer=sd_installer) offline_srs = sr_client_map.pop('offline') if sorted([sr.guid for sr in offline_srs ]) != sorted(offline_storage_router_guids): raise RuntimeError('Not all StorageRouters are reachable') if storagerouter not in offline_srs: mtpt_pids = sr_installer.root_client.run( "lsof -t +D '/mnt/{0}' || true".format( vp_installer.name.replace(r"'", r"'\''")), allow_insecure=True).splitlines() if len(mtpt_pids) > 0: raise RuntimeError( 'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}' .format(', '.join(mtpt_pids))) # Retrieve reachable StorageDrivers reachable_storagedrivers = [] for sd in vp_installer.vpool.storagedrivers: if sd.storagerouter not in sr_client_map: # StorageRouter is offline continue sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format( vp_installer.vpool.guid, sd.storagedriver_id) if Configuration.exists(sd_key) is True: path = Configuration.get_configuration_path(sd_key) with remote(sd.storagerouter.ip, [LocalStorageRouterClient]) as rem: try: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision( ) # 'Cheap' call to verify whether volumedriver is responsive cls._logger.info( 'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}' .format(storagedriver.guid, sd.name, sd.storagerouter.ip)) reachable_storagedrivers.append(sd) except Exception as exception: if not is_connection_failure(exception): raise if len(reachable_storagedrivers) == 0: raise RuntimeError( 'Could not find any responsive node in the cluster') # Start removal if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.SHRINKING) else: vp_installer.update_status(status=VPool.STATUSES.DELETING) # Clean up stale vDisks cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format( storagedriver.guid)) VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool) # Reconfigure the MDSes cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format( storagedriver.guid)) for vdisk_guid in storagerouter.vdisks_guids: try: MDSServiceController.ensure_safety( vdisk_guid=vdisk_guid, excluded_storagerouter_guids=[storagerouter.guid] + offline_storage_router_guids) except Exception: cls._logger.exception( 'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed' .format(storagedriver.guid, vdisk_guid)) # Validate that all MDSes on current StorageRouter have been moved away # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code vdisks = [] for mds in vp_installer.mds_services: for junction in mds.vdisks: vdisk = junction.vdisk if vdisk in vdisks: continue vdisks.append(vdisk) cls._logger.critical( 'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away' .format(storagedriver.guid, vdisk.guid, vdisk.name)) if len(vdisks) > 0: # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway vp_installer.update_status(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Not all MDS Services have been successfully migrated away') # Start with actual removal errors_found = False if storagerouter not in offline_srs: errors_found &= sd_installer.stop_services() errors_found &= vp_installer.configure_cluster_registry( exclude=[storagedriver], apply_on=reachable_storagedrivers) errors_found &= vp_installer.update_node_distance_map() errors_found &= vp_installer.remove_mds_services() errors_found &= sd_installer.clean_config_management() errors_found &= sd_installer.clean_model() if storagerouter not in offline_srs: errors_found &= sd_installer.clean_directories( mountpoints=StorageRouterController.get_mountpoints( client=sr_installer.root_client)) try: DiskController.sync_with_reality( storagerouter_guid=storagerouter.guid) except Exception: cls._logger.exception( 'StorageDriver {0} - Synchronizing disks with reality failed' .format(storagedriver.guid)) errors_found = True if vp_installer.storagedriver_amount > 1: # Update the vPool metadata and run DTL checkup vp_installer.vpool.metadata['caching_info'].pop( sr_installer.storagerouter.guid, None) vp_installer.vpool.save() try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except Exception: cls._logger.exception( 'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}' .format(storagedriver.guid, vp_installer.name, vp_installer.vpool.guid)) else: cls._logger.info( 'StorageDriver {0} - Removing vPool from model'.format( storagedriver.guid)) # Clean up model try: vp_installer.vpool.delete() except Exception: errors_found = True cls._logger.exception( 'StorageDriver {0} - Cleaning up vPool from the model failed' .format(storagedriver.guid)) Configuration.delete('/ovs/vpools/{0}'.format( vp_installer.vpool.guid)) cls._logger.info('StorageDriver {0} - Running MDS checkup'.format( storagedriver.guid)) try: MDSServiceController.mds_checkup() except Exception: cls._logger.exception( 'StorageDriver {0} - MDS checkup failed'.format( storagedriver.guid)) # Update vPool status if errors_found is True: if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise RuntimeError( '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information' ) if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info( 'StorageDriver {0} - Deleted StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) if len(VPoolList.get_vpools()) == 0: cluster_name = ArakoonInstaller.get_cluster_name('voldrv') if ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name)['internal'] is True: cls._logger.debug( 'StorageDriver {0} - Removing Arakoon cluster {1}'.format( storagedriver.guid, cluster_name)) try: installer = ArakoonInstaller(cluster_name=cluster_name) installer.load() installer.delete_cluster() except Exception: cls._logger.exception( 'StorageDriver {0} - Delete voldrv Arakoon cluster failed' .format(storagedriver.guid)) service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in list(service_type.services): if service.name == service_name: service.delete() # Remove watcher volumedriver service if last StorageDriver on current StorageRouter if len( storagerouter.storagedrivers ) == 0 and storagerouter not in offline_srs: # ensure client is initialized for StorageRouter try: if cls._service_manager.has_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client): cls._service_manager.stop_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) cls._service_manager.remove_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) except Exception: cls._logger.exception( 'StorageDriver {0} - {1} service deletion failed'.format( storagedriver.guid, ServiceFactory.SERVICE_WATCHER_VOLDRV))
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter( data={ 'name': 'Openstack', 'description': 'test', 'username': CINDER_USER, 'password': CINDER_PASS, 'ip': CINDER_CONTROLLER, 'port': 80, 'type': 'OPENSTACK', 'metadata': { 'integratemgmt': True } }) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') backend_type = 'local' fields = ['storage_ip', 'vrouter_port'] parameters = { 'storagerouter_ip': IP, 'vpool_name': VPOOL_NAME, 'type': 'local', 'mountpoint_bfs': VPOOL_BFS, 'mountpoint_temp': VPOOL_TEMP, 'mountpoint_md': VPOOL_MD, 'mountpoint_readcaches': [VPOOL_READCACHE], 'mountpoint_writecaches': [VPOOL_WRITECACHE], 'mountpoint_foc': VPOOL_FOC, 'storage_ip': '127.0.0.1', #KVM 'vrouter_port': VPOOL_PORT, 'integrate_vpool': True, 'connection_host': IP, 'connection_port': VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug('vpool %s created' % VPOOL_NAME) try: os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: #either it doesn't exist, or we don't have permission self._debug('vpool not ready yet %s' % (str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError( 'Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
def validate_vpool_extendable(self): """ Perform some validations on the specified StorageRouter to verify whether a vPool can be created or extended on it :return: None :rtype: NoneType """ if self.partition_info is None: raise RuntimeError( 'Partition information has not been retrieved yet') # Validate RDMA capabilities if self.sd_installer.rdma_enabled is True and self.storagerouter.rdma_capable is False: raise RuntimeError( 'DTL transport over RDMA is not supported by StorageRouter with IP {0}' .format(self.storagerouter.ip)) # Validate block cache is allowed to be used if self.storagerouter.features is None: raise RuntimeError('Could not load available features') self.block_cache_supported = 'block-cache' in self.storagerouter.features.get( 'alba', {}).get('features', []) if self.block_cache_supported is False and ( self.sd_installer.block_cache_on_read is True or self.sd_installer.block_cache_on_write is True): raise RuntimeError('Block cache is not a supported feature') # Validate mount point for the vPool to be created does not exist yet if StorageRouterController.mountpoint_exists( name=self.vp_installer.name, storagerouter_guid=self.storagerouter.guid): raise RuntimeError( 'The mount point for vPool {0} already exists'.format( self.vp_installer.name)) # Validate SCRUB role available on any StorageRouter if StorageRouterController.check_scrub_partition_present() is False: raise RuntimeError( 'At least 1 StorageRouter must have a partition with a {0} role' .format(DiskPartition.ROLES.SCRUB)) # Validate required roles present for required_role in [ DiskPartition.ROLES.DB, DiskPartition.ROLES.DTL, DiskPartition.ROLES.WRITE ]: if required_role not in self.partition_info: raise RuntimeError( 'Missing required partition with a {0} role'.format( required_role)) elif len(self.partition_info[required_role]) == 0: raise RuntimeError( 'At least 1 partition with a {0} role is required per StorageRouter' .format(required_role)) elif required_role in [ DiskPartition.ROLES.DB, DiskPartition.ROLES.DTL ]: if len(self.partition_info[required_role]) > 1: raise RuntimeError( 'Only 1 partition with a {0} role is allowed per StorageRouter' .format(required_role)) else: total_available = [ part['available'] for part in self.partition_info[required_role] ] if total_available == 0: raise RuntimeError( 'Not enough available space for {0}'.format( required_role)) # Validate mount points are mounted for role, part_info in self.partition_info.iteritems(): if role not in [ DiskPartition.ROLES.DB, DiskPartition.ROLES.DTL, DiskPartition.ROLES.WRITE, DiskPartition.ROLES.SCRUB ]: continue for part in part_info: mount_point = part['mountpoint'] if mount_point == DiskPartition.VIRTUAL_STORAGE_LOCATION: continue if self.root_client.is_mounted(path=mount_point) is False: raise RuntimeError( 'Mount point {0} is not mounted'.format(mount_point))
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ LogHandler.get('extensions', name='ovs_extensions') # Initiate extensions logger from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages='Remove node', boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages= 'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n' ) service_manager = ServiceFactory.get_manager() ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError('Node IP must be a string') if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError('Invalid IP {0} specified'.format(node_ip)) storage_router_all = sorted(StorageRouterList.get_storagerouters(), key=lambda k: k.name) storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set( [storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([ storage_router.ip for storage_router in storage_router_masters ]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) offline_reasons = {} if node_ip not in storage_router_all_ips: raise ValueError( 'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}' .format('\n - '.join(storage_router_all_ips), node_ip)) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len( storage_router_master_ips) == 1: raise RuntimeError( "Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( 'The node to be removed cannot be identical to the node on which the removal is initiated' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Creating SSH connections to remaining master nodes') master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username='******', timeout=10) except (UnableToConnectException, NotAuthenticatedException, TimeOutException) as ex: if isinstance(ex, UnableToConnectException): msg = 'Unable to connect' elif isinstance(ex, NotAuthenticatedException): msg = 'Could not authenticate' elif isinstance(ex, TimeOutException): msg = 'Connection timed out' Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- {1}'.format( storage_router.ip, msg)) offline_reasons[storage_router.ip] = msg storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False continue Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- Successfully connected' .format(storage_router.ip)) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER': master_ip = storage_router.ip if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError( 'Could not connect to any master node in the cluster') storage_router_to_remove.invalidate_dynamics('vdisks_guids') if len( storage_router_to_remove.vdisks_guids ) > 0: # vDisks are supposed to be moved away manually before removing a node raise RuntimeError( "Still vDisks attached to Storage Router {0}".format( storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed( service='memcached') internal_rabbit_mq = Toolbox.is_service_internally_managed( service='rabbitmq') memcached_endpoints = Configuration.get( key='/ovs/framework/memcache|endpoints') rabbit_mq_endpoints = Configuration.get( key='/ovs/framework/messagequeue|endpoints') copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints ) == 0 and internal_memcached is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the memcached service' ) if len(copy_rabbit_mq_endpoints ) == 0 and internal_rabbit_mq is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the messagequeue service' ) Toolbox.run_hooks(component='noderemoval', sub_component='validate_removal', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the validation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ################# # CONFIRMATIONS # ################# try: interactive = silent != '--force-yes' remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: if len(storage_routers_offline) > 0: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Offline nodes: {0}'.format(''.join( ('\n * {0:<15}- {1}.'.format(ip, message) for ip, message in offline_reasons.iteritems())))) valid_node_info = Interactive.ask_yesno( message= 'Continue the removal with these being presumably offline?', default_value=False) if valid_node_info is False: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Please validate the state of the nodes before removing.', title=True) sys.exit(1) proceed = Interactive.ask_yesno( message='Are you sure you want to remove node {0}?'.format( storage_router_to_remove.name), default_value=False) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages='Abort removal', title=True) sys.exit(1) remove_asd_manager = True if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') if service_manager.has_service(name='asd-manager', client=client): remove_asd_manager = Interactive.ask_yesno( message= 'Do you also want to remove the ASD manager and related ASDs?', default_value=False) if remove_asd_manager is True or storage_router_to_remove_online is False: for fct in Toolbox.fetch_hooks('noderemoval', 'validate_asd_removal'): validation_output = fct(storage_router_to_remove.ip) if validation_output['confirm'] is True: if Interactive.ask_yesno( message=validation_output['question'], default_value=False) is False: remove_asd_manager = False break except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the confirmation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ########### # REMOVAL # ########### try: Toolbox.log(logger=NodeRemovalController._logger, messages='Starting removal of node {0} - {1}'.format( storage_router_to_remove.name, storage_router_to_remove.ip)) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages= ' Marking all Storage Drivers served by Storage Router {0} as offline' .format(storage_router_to_remove.ip)) StorageDriverController.mark_offline( storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPools from node'.format( storage_router_to_remove.ip)) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPool {0} from node'.format( storage_driver.vpool.name)) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids) # Demote if MASTER if storage_router_to_remove.node_type == 'MASTER': NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages='Stopping and removing services') if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger) service = 'watcher-config' if service_manager.has_service(service, client=client): Toolbox.log( logger=NodeRemovalController._logger, messages='Removing service {0}'.format(service)) service_manager.stop_service(service, client=client) service_manager.remove_service(service, client=client) Toolbox.run_hooks(component='noderemoval', sub_component='remove', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages='Removing node from model') for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete('/ovs/framework/hosts/{0}'.format( storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') client.file_delete(filenames=[Configuration.CACC_LOCATION]) client.file_delete( filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages='Successfully removed node\n') except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1) if remove_asd_manager is True and storage_router_to_remove_online is True: Toolbox.log(logger=NodeRemovalController._logger, messages='\nRemoving ASD Manager') with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system('asd-manager remove --force-yes') Toolbox.log(logger=NodeRemovalController._logger, messages='Remove nodes finished', title=True)