def resize_from_voldrv(volumename, volumesize, volumepath, storagedriver_id): """ Resize a disk Triggered by volumedriver messages on the queue @param volumepath: path on hypervisor to the volume @param volumename: volume id of the disk @param volumesize: size of the volume """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) hypervisor = Factory.get(pmachine) volumepath = hypervisor.clean_backing_disk_filename(volumepath) mutex = VolatileMutex('{}_{}'.format(volumename, volumepath)) try: mutex.acquire(wait=30) disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is None: disk = VDiskList.get_by_devicename_and_vpool( volumepath, storagedriver.vpool) if disk is None: disk = VDisk() finally: mutex.release() disk.devicename = volumepath disk.volume_id = volumename disk.size = volumesize disk.vpool = storagedriver.vpool disk.save() VDiskController.sync_with_mgmtcenter(disk, pmachine, storagedriver) MDSServiceController.ensure_safety(disk)
def test_load_calculation(self): """ Validates whether the load calculation works """ vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure( {'vpools': [1], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1)]} # (<id>, <sd_id>) ) mds_service = mds_services[1] self._create_vdisks_for_mds_service(2, 1, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 20, 'There should be a 20% load. {0}'.format(load)) self.assertEqual(load_plus, 30, 'There should be a 30% plus load. {0}'.format(load_plus)) self._create_vdisks_for_mds_service(3, 3, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load)) self.assertEqual(load_plus, 60, 'There should be a 60% plus load. {0}'.format(load_plus)) self._create_vdisks_for_mds_service(5, 6, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 100, 'There should be a 100% load. {0}'.format(load)) self.assertEqual(load_plus, 110, 'There should be a 110% plus load. {0}'.format(load_plus)) mds_service.capacity = -1 mds_service.save() load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load)) self.assertEqual(load_plus, 50, 'There should be a 50% plus load. {0}'.format(load_plus)) mds_service.capacity = 0 mds_service.save() load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, float('inf'), 'There should be infinite load. {0}'.format(load)) self.assertEqual(load_plus, float('inf'), 'There should be infinite plus load. {0}'.format(load_plus))
def resize_from_voldrv(volumename, volumesize, volumepath, storagedriver_id): """ Resize a disk Triggered by volumedriver messages on the queue @param volumepath: path on hypervisor to the volume @param volumename: volume id of the disk @param volumesize: size of the volume """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) hypervisor = Factory.get(pmachine) volumepath = hypervisor.clean_backing_disk_filename(volumepath) mutex = VolatileMutex('{}_{}'.format(volumename, volumepath)) try: mutex.acquire(wait=30) disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is None: disk = VDiskList.get_by_devicename_and_vpool(volumepath, storagedriver.vpool) if disk is None: disk = VDisk() finally: mutex.release() disk.devicename = volumepath disk.volume_id = volumename disk.size = volumesize disk.vpool = storagedriver.vpool disk.save() VDiskController.sync_with_mgmtcenter(disk, pmachine, storagedriver) MDSServiceController.ensure_safety(disk)
def _test_scenario(scenario, _vdisks, _mds_services): """ Executes a test run for a given scenario """ _generate_backend_config(scenario, _vdisks, _mds_services) for vdisk_id in _vdisks: MDSServiceController.sync_vdisk_to_reality(_vdisks[vdisk_id]) _validate_scenario(scenario, _vdisks, _mds_services)
def volumedriver_error(code, volumename): """ Handles error messages/events from the volumedriver """ if code == VolumeDriverEvents.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: MDSServiceController.ensure_safety(disk)
def _test_scenario(scenario, _vdisks, _mds_services): """ Executes a testrun for a given scenario """ _generate_backend_config(scenario, _vdisks, _mds_services) for vdisk_id in _vdisks: MDSServiceController.sync_vdisk_to_reality(_vdisks[vdisk_id]) _validate_scenario(scenario, _vdisks, _mds_services)
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = '{} {}'.format(machinename, devicename) properties_to_clone = ['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup'] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() try: storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with id {0}'.format(vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {} of disk {} to location {}'.format(snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id) ) except Exception as ex: logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex)) new_vdisk.delete() VDiskController.delete_volume(location) raise new_vdisk.volume_id = volume_id new_vdisk.save() try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Caught exception during "ensure_safety" {0}'.format(ex)) return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location}
def volumedriver_error(code, volumename, storagedriver_id): """ Handles error messages/events from the volumedriver """ _ = storagedriver_id # Required for the @log decorator if code == VolumeDriverEvents.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: MDSServiceController.ensure_safety(disk)
def volumedriver_error(code, volumename): """ Handles error messages/events from the volumedriver :param code: Volumedriver error code :param volumename: Name of the volume throwing the error """ if code == VolumeDriverEvents.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: MDSServiceController.ensure_safety(disk)
def _execute_scrub_work(scrub_location, vdisk_guids): def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics(['info']) vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs ScheduledTaskController._logger.info('Execute Scrub - Started') ScheduledTaskController._logger.info('Execute Scrub - Scrub location - {0}'.format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Started'.format(vdisk.guid, vdisk.name)) vdisk.invalidate_dynamics(['storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = _verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.debug('Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover'.format(vdisk.guid, vdisk.name)) MDSServiceController.ensure_safety(vdisk) configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: skipped += 1 ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local'.format(vdisk.guid, vdisk.name)) continue with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work'.format(vdisk.guid, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub(work_unit, scrub_location, log_sinks=[SCRUBBER_LOGFILE_LOCATION]) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied'.format(vdisk.guid, vdisk.name)) else: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - No scrubbing required'.format(vdisk.guid, vdisk.name)) except Exception as ex: failures.append('Failed scrubbing work unit for volume {0} with guid {1}: {2}'.format(vdisk.name, vdisk.guid, ex)) failed = len(failures) ScheduledTaskController._logger.info('Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}'.format((total - failed - skipped), failed, skipped)) if failed > 0: raise Exception('\n - '.join(failures)) return vdisk_guids
def _set_mds_safety(vpool, safety=None, checkup=False, logger=LOGGER): if safety is None: safety = len(StoragerouterHelper.get_storagerouters()) if safety <= 0: raise ValueError('Safety should be at least 1.') logger.debug('Setting the safety to {} and {} checkup'.format(safety, 'will' if checkup is True else 'false')) storagedriver_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format(vpool.guid)) current_safety = storagedriver_config current_safety['mds_safety'] = safety Configuration.set('/ovs/framework/storagedriver', current_safety) if checkup is True: MDSServiceController.mds_checkup()
def volumedriver_error(code, volumename, storagedriver_id): """ Handles error messages/events from the volumedriver :param code: Volumedriver error code :param volumename: Name of the volume throwing the error :param storagedriver_id: ID of the storagedriver hosting the volume """ _ = storagedriver_id # Required for the @log decorator if code == VolumeDriverEvents.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: MDSServiceController.ensure_safety(disk)
def _execute_scrub_work(scrub_location, vdisk_guids): def verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics(["info"]) vdisk_configs = current_vdisk.info["metadata_backend_config"] if len(vdisk_configs) == 0: raise RuntimeError("Could not load MDS configuration") return vdisk_configs logger.info("Scrub location: {0}".format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver logger.info("Scrubbing virtual disk {0} with guid {1}".format(vdisk.name, vdisk.guid)) vdisk.invalidate_dynamics(["storagedriver_id"]) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id ) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get("ip") != storagedriver.storagerouter.ip: logger.debug("MDS for volume {0} is not local. Trigger handover".format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) configs = verify_mds_config(current_vdisk=vdisk) if configs[0].get("ip") != storagedriver.storagerouter.ip: skipped += 1 logger.info( "Skipping scrubbing work unit for volume {0}: MDS master is not local".format( vdisk.volume_id ) ) continue with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub(work_unit, scrub_location) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: logger.info("Scrubbing successfully applied") except Exception, ex: failures.append( "Failed scrubbing work unit for volume {0} with guid {1}: {2}".format(vdisk.name, vdisk.guid, ex) )
def volumedriver_error(code, volume_id): """ Handles error messages/events from the volumedriver :param code: Volumedriver error code :type code: int :param volume_id: Name of the volume throwing the error :type volume_id: str :return: None """ if code == VolumeDriverEvents.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volume_id) if disk is not None: MDSServiceController.ensure_safety(disk)
def volumedriver_error(code, volume_id): """ Handles error messages/events from the volumedriver :param code: Volumedriver error code :type code: int :param volume_id: Name of the volume throwing the error :type volume_id: str :return: None """ if code == VolumeDriverEvents_pb2.MDSFailover: disk = VDiskList.get_vdisk_by_volume_id(volume_id) if disk is not None: MDSServiceController.ensure_safety(disk)
def test_load_calculation(self): """ Validates whether the load calculation works """ vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure( { 'vpools': [1], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1)] } # (<id>, <sd_id>) ) mds_service = mds_services[1] self._create_vdisks_for_mds_service(2, 1, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 20, 'There should be a 20% load. {0}'.format(load)) self.assertEqual( load_plus, 30, 'There should be a 30% plus load. {0}'.format(load_plus)) self._create_vdisks_for_mds_service(3, 3, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load)) self.assertEqual( load_plus, 60, 'There should be a 60% plus load. {0}'.format(load_plus)) self._create_vdisks_for_mds_service(5, 6, mds_service=mds_service) load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 100, 'There should be a 100% load. {0}'.format(load)) self.assertEqual( load_plus, 110, 'There should be a 110% plus load. {0}'.format(load_plus)) mds_service.capacity = -1 mds_service.save() load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load)) self.assertEqual( load_plus, 50, 'There should be a 50% plus load. {0}'.format(load_plus)) mds_service.capacity = 0 mds_service.save() load, load_plus = MDSServiceController.get_mds_load(mds_service) self.assertEqual(load, float('inf'), 'There should be infinite load. {0}'.format(load)) self.assertEqual( load_plus, float('inf'), 'There should be infinite plus load. {0}'.format(load_plus))
def get_stats_mds(cls): """ Retrieve how many vDisks each MDS service is serving, whether as master or slave """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] environment = cls._config['environment'] service_type = ServiceTypeList.get_by_name('MetadataServer') if service_type is None: raise RuntimeError('MetadataServer service not found in the model') for service in service_type.services: slaves = 0 masters = 0 mds_service = service.mds_service for junction in mds_service.vdisks: if junction.is_master is True: masters += 1 else: slaves += 1 stats.append({'tags': {'vpool_name': mds_service.vpool.name, 'mds_number': mds_service.number, 'environment': environment, 'storagerouter_name': service.storagerouter.name}, 'fields': {'load': MDSServiceController.get_mds_load(mds_service)[0], 'capacity': mds_service.capacity if mds_service.capacity != -1 else 'infinite', 'masters': masters, 'slaves': slaves}, 'measurement': 'mds'}) return False, stats
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = "{} {}".format(machinename, devicename) properties_to_clone = ["description", "size", "type", "retentionpolicyguid", "snapshotpolicyguid", "autobackup"] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = "{0}-clone".format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError("Could not find StorageDriver with id {0}".format(vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError("Could not find a MDS service") logger.info("Clone snapshot {} of disk {} to location {}".format(snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig( [MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])] ), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id), ) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) return {"diskguid": new_vdisk.guid, "name": new_vdisk.name, "backingdevice": location}
def migrate_from_voldrv(volume_id, new_owner_id): """ Triggered when volume has changed owner (Clean migration or stolen due to other reason) Triggered by volumedriver messages :param volume_id: Volume ID of the disk :type volume_id: unicode :param new_owner_id: ID of the storage driver the volume migrated to :type new_owner_id: unicode :returns: None """ sd = StorageDriverList.get_by_storagedriver_id(storagedriver_id=new_owner_id) vdisk = VDiskList.get_vdisk_by_volume_id(volume_id=volume_id) if vdisk is not None: logger.info('Migration - Guid {0} - ID {1} - Detected migration for virtual disk {2}'.format(vdisk.guid, vdisk.volume_id, vdisk.name)) if sd is not None: logger.info('Migration - Guid {0} - ID {1} - Storage Router {2} is the new owner of virtual disk {3}'.format(vdisk.guid, vdisk.volume_id, sd.storagerouter.name, vdisk.name)) MDSServiceController.mds_checkup() VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)
def remove_mds_services(self): """ Remove the MDS services related to the StorageDriver being deleted :return: A boolean indicating whether something went wrong :rtype: bool """ # Removing MDS services self._logger.info('Removing MDS services') errors_found = False for mds_service in self.mds_services: try: self._logger.info( 'Remove MDS service (number {0}) for StorageRouter with IP {1}' .format(mds_service.number, self.sr_installer.storagerouter.ip)) MDSServiceController.remove_mds_service( mds_service=mds_service, reconfigure=False, allow_offline=self.sr_installer.root_client is None) # No root_client means the StorageRouter is offline except Exception: self._logger.exception('Removing MDS service failed') errors_found = True return errors_found
def _generate_mds_service_load_repr(_mds_service): """ Generates a load representing thing for a given mds_service """ masters, slaves = 0, 0 for _junction in _mds_service.vdisks: if _junction.is_master: masters += 1 else: slaves += 1 capacity = _mds_service.capacity if capacity == -1: capacity = 'infinite' _load, _ = MDSServiceController.get_mds_load(_mds_service) if _load == float('inf'): _load = 'infinite' else: _load = round(_load, 2) return [_mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load]
def _generate_mds_service_load_repr(_mds_service): """ Generates a load representing thing for a given mds_service """ masters, slaves = 0, 0 for _junction in _mds_service.vdisks: if _junction.is_master: masters += 1 else: slaves += 1 capacity = _mds_service.capacity if capacity == -1: capacity = 'infinite' _load, _ = MDSServiceController.get_mds_load(_mds_service) if _load == float('inf'): _load = 'infinite' else: _load = round(_load, 2) return [ _mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load ]
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] MDSServiceController.ensure_safety(vdisk) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() logger.info('Updating vMachine finished (name {}, {} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def test_ensure_safety(self): """ Validates whether the ensure_safety call works as expected """ def _generate_mds_service_load_repr(_mds_service): """ Generates a load representing thing for a given mds_service """ masters, slaves = 0, 0 for _junction in _mds_service.vdisks: if _junction.is_master: masters += 1 else: slaves += 1 capacity = _mds_service.capacity if capacity == -1: capacity = 'infinite' _load, _ = MDSServiceController.get_mds_load(_mds_service) if _load == float('inf'): _load = 'infinite' else: _load = round(_load, 2) return [_mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load] def _check_reality(_configs, _loads, _vdisks, _mds_services, test=True, display=False): """ Validates 'reality' with an expected config/load """ reality_configs = [] for _vdisk_id in _vdisks: reality_configs.append(_vdisks[_vdisk_id].info['metadata_backend_config']) if display is True: for c in reality_configs: print c if test is True: self.assertListEqual(reality_configs, _configs) reality_loads = [] for mds_id in _mds_services: reality_loads.append(_generate_mds_service_load_repr(_mds_services[mds_id])) if display is True: for l in reality_loads: print l if test is True: self.assertListEqual(reality_loads, _loads) PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3) PersistentFactory.get_client().set('ovs.storagedriver.mds.maxload', 75) PersistentFactory.get_client().set('ovs.storagedriver.mds.tlogs', 100) vpools, storagerouters, storagedrivers, _, mds_services, service_type = self._build_service_structure( {'vpools': [1], 'storagerouters': [1, 2, 3, 4], 'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1), (2, 2), (3, 3), (4, 4)]} # (<id>, <sd_id>) ) vdisks = {} start_id = 1 for mds_service in mds_services.itervalues(): vdisks.update(self._create_vdisks_for_mds_service(2, start_id, mds_service=mds_service)) start_id += 2 # Validate the start configuration which is simple, each disk has only its default local master configs = [[{'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.2', 'port': 2}], [{'ip': '10.0.0.2', 'port': 2}], [{'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.4', 'port': 4}]] loads = [['10.0.0.1', 1, 2, 0, 10, 20.0], ['10.0.0.2', 2, 2, 0, 10, 20.0], ['10.0.0.3', 3, 2, 0, 10, 20.0], ['10.0.0.4', 4, 2, 0, 10, 20.0]] _check_reality(configs, loads, vdisks, mds_services) # Validate first run. Each disk should now have sufficient nodes, since there are plenty of MDS services available configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.2', 'port': 2}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 2}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 2}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 4, 10, 60.0], ['10.0.0.3', 3, 2, 4, 10, 60.0], ['10.0.0.4', 4, 2, 3, 10, 50.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether this extra (unnecessary) run doesn't change anything, preventing reconfiguring over and # over again for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validating whether an overloaded node is correctly rebalanced mds_services[2].capacity = 2 mds_services[2].save() configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether the overloaded services are still handled. In this case, causing a re-order of the slaves as # ordered in the model configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Again, validating whether a subsequent run doesn't give unexpected changes for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # A MDS service will be added (next to the overloaded service), this should cause the expected to be rebalanced s_id = '{0}-5'.format(storagerouters[2].name) service = Service() service.name = s_id service.storagerouter = storagerouters[2] service.ports = [5] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = vpools[1] mds_service.save() mds_services[5] = mds_service configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0], ['10.0.0.2', 5, 0, 3, 10, 30.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # If the tlogs are not caught up, nothing should be changed for vdisk_id in [3, 4]: StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 1000 for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # The next run, after tlogs are caught up, a master switch should be executed for vdisk_id in [3, 4]: StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 50 configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0], ['10.0.0.2', 5, 1, 1, 10, 20.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether a volume migration makes the master follow StorageDriverClient.vrouter_id[vdisks[1].volume_id] = storagedrivers[3].storagedriver_id configs = [[{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 5}], [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}], [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}], [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}], [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]] loads = [['10.0.0.1', 1, 1, 6, 10, 70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0], ['10.0.0.3', 3, 3, 4, 10, 70.0], ['10.0.0.4', 4, 2, 4, 10, 60.0], ['10.0.0.2', 5, 1, 2, 10, 30.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validates if a second run doesn't change anything for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services)
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': []}) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': []}) buckets.append({'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': []}) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot['snapshots'].iteritems(): bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot['is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp']] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp']] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot(diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0: vdisks.append(vdisk) total = 0 failed = 0 skipped = 0 storagedrivers = {} for vdisk in vdisks: try: total += 1 # Load the vDisk's StorageDriver vdisk.invalidate_dynamics(['info', 'storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: # The MDS master is not local. Trigger an MDS handover and try again logger.debug('MDS for volume {0} is not local. Trigger handover'.format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: skipped += 1 logger.info('Skipping scrubbing work unit for volume {0}: MDS master is not local'.format( vdisk.volume_id )) continue work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id)) for work_unit in work_units: scrubbing_result = _storagedriver_scrubber.scrub(work_unit, str(storagedriver.mountpoint_temp)) vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result) except Exception, ex: failed += 1 logger.info('Failed scrubbing work unit for volume {0}: {1}'.format( vdisk.volume_id, ex ))
def test_storagedriver_config_set(self): """ Validates whether storagedriver configuration is generated as expected """ PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3) vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure( {'vpools': [1, 2], 'storagerouters': [1, 2, 3, 4, 5, 6], 'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4), (5, 2, 4), (6, 2, 5), (7, 2, 6)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1), (2, 1), (3, 2), (4, 3), (5, 4), (6, 5), (7, 6), (8, 7), (9, 7)]} # (<id>, <sd_id>) ) vdisks = {} start_id = 1 for mds_service in mds_services.itervalues(): vdisks.update(self._create_vdisks_for_mds_service(10, start_id, mds_service=mds_service)) start_id += 10 mds_services[1].capacity = 11 # on 1, vpool 1 mds_services[1].save() mds_services[2].capacity = 20 # on 1, vpool 1 mds_services[2].save() mds_services[3].capacity = 12 # on 2, vpool 1 mds_services[3].save() mds_services[4].capacity = 14 # on 3, vpool 1 mds_services[4].save() mds_services[5].capacity = 16 # on 4, vpool 1 mds_services[5].save() mds_services[6].capacity = 11 # on 4, vpool 2 mds_services[6].save() mds_services[7].capacity = 13 # on 5, vpool 2 mds_services[7].save() mds_services[8].capacity = 19 # on 6, vpool 2 mds_services[8].save() mds_services[9].capacity = 15 # on 6, vpool 2 mds_services[9].save() config = MDSServiceController.get_mds_storagedriver_config_set(vpools[1]) expected = {storagerouters[1].guid: [{'host': '10.0.0.1', 'port': 2}, {'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.3', 'port': 4}], storagerouters[2].guid: [{'host': '10.0.0.2', 'port': 3}, {'host': '10.0.0.1', 'port': 2}, {'host': '10.0.0.4', 'port': 5}], storagerouters[3].guid: [{'host': '10.0.0.3', 'port': 4}, {'host': '10.0.0.1', 'port': 2}, {'host': '10.0.0.4', 'port': 5}], storagerouters[4].guid: [{'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.1', 'port': 2}, {'host': '10.0.0.3', 'port': 4}]} self.assertDictEqual(config, expected, 'Test 1. Got:\n{0}'.format(json.dumps(config, indent=2))) mds_services[2].capacity = 10 # on 1, vpool 1 mds_services[2].save() config = MDSServiceController.get_mds_storagedriver_config_set(vpools[1]) expected = {storagerouters[1].guid: [{'host': '10.0.0.1', 'port': 1}, {'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.3', 'port': 4}], storagerouters[2].guid: [{'host': '10.0.0.2', 'port': 3}, {'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.3', 'port': 4}], storagerouters[3].guid: [{'host': '10.0.0.3', 'port': 4}, {'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.2', 'port': 3}], storagerouters[4].guid: [{'host': '10.0.0.4', 'port': 5}, {'host': '10.0.0.3', 'port': 4}, {'host': '10.0.0.2', 'port': 3}]} self.assertDictEqual(config, expected, 'Test 2. Got:\n{0}'.format(json.dumps(config, indent=2)))
def test_ensure_safety(self): """ Validates whether the ensure_safety call works as expected """ def _generate_mds_service_load_repr(_mds_service): """ Generates a load representing thing for a given mds_service """ masters, slaves = 0, 0 for _junction in _mds_service.vdisks: if _junction.is_master: masters += 1 else: slaves += 1 capacity = _mds_service.capacity if capacity == -1: capacity = 'infinite' _load, _ = MDSServiceController.get_mds_load(_mds_service) if _load == float('inf'): _load = 'infinite' else: _load = round(_load, 2) return [ _mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load ] def _check_reality(_configs, _loads, _vdisks, _mds_services, test=True, display=False): """ Validates 'reality' with an expected config/load """ reality_configs = [] for _vdisk_id in _vdisks: reality_configs.append( _vdisks[_vdisk_id].info['metadata_backend_config']) if display is True: for c in reality_configs: print c if test is True: self.assertListEqual(reality_configs, _configs) reality_loads = [] for mds_id in _mds_services: reality_loads.append( _generate_mds_service_load_repr(_mds_services[mds_id])) if display is True: for l in reality_loads: print l if test is True: self.assertListEqual(reality_loads, _loads) PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3) PersistentFactory.get_client().set('ovs.storagedriver.mds.maxload', 75) PersistentFactory.get_client().set('ovs.storagedriver.mds.tlogs', 100) vpools, storagerouters, storagedrivers, _, mds_services, service_type = self._build_service_structure( { 'vpools': [1], 'storagerouters': [1, 2, 3, 4], 'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1), (2, 2), (3, 3), (4, 4)] } # (<id>, <sd_id>) ) vdisks = {} start_id = 1 for mds_service in mds_services.itervalues(): vdisks.update( self._create_vdisks_for_mds_service(2, start_id, mds_service=mds_service)) start_id += 2 # Validate the start configuration which is simple, each disk has only its default local master configs = [[{ 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.2', 'port': 2 }], [{ 'ip': '10.0.0.2', 'port': 2 }], [{ 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.4', 'port': 4 }]] loads = [['10.0.0.1', 1, 2, 0, 10, 20.0], ['10.0.0.2', 2, 2, 0, 10, 20.0], ['10.0.0.3', 3, 2, 0, 10, 20.0], ['10.0.0.4', 4, 2, 0, 10, 20.0]] _check_reality(configs, loads, vdisks, mds_services) # Validate first run. Each disk should now have sufficient nodes, since there are plenty of MDS services available configs = [[{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.2', 'port': 2 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.2', 'port': 2 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.2', 'port': 2 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 4, 10, 60.0], ['10.0.0.3', 3, 2, 4, 10, 60.0], ['10.0.0.4', 4, 2, 3, 10, 50.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether this extra (unnessecairy) run doesn't change anything, preventing reconfiguring over and # over again for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validating whether an overloaded node will cause correct rebalancing mds_services[2].capacity = 2 mds_services[2].save() configs = [[{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether the overloaded services are still handled. In this case, causing a reoder of the slaves as # ordered in the model configs = [[{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Again, validating whether a subsequent run doesn't give unexpected changes for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # A MDS service will be added (next to the overloaded service), this should cause the expected rebalancing s_id = '{0}-5'.format(storagerouters[2].name) service = Service() service.name = s_id service.storagerouter = storagerouters[2] service.ports = [5] service.type = service_type service.save() mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = vpools[1] mds_service.save() mds_services[5] = mds_service configs = [[{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.2', 'port': 5 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.2', 'port': 5 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.2', 'port': 5 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 2, 0, 2, 100.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0], ['10.0.0.2', 5, 0, 3, 10, 30.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # If the tlogs are not catched up, nothing should be changed for vdisk_id in [3, 4]: StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 1000 for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # The next run, after tlogs are catched up, a master switch should be executed for vdisk_id in [3, 4]: StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 50 configs = [[{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 5 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.2', 'port': 5 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 2, 5, 10, 70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0], ['10.0.0.3', 3, 2, 5, 10, 70.0], ['10.0.0.4', 4, 2, 5, 10, 70.0], ['10.0.0.2', 5, 1, 1, 10, 20.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validate whether a volume migration makes the master follow StorageDriverClient.vrouter_id[ vdisks[1].volume_id] = storagedrivers[3].storagedriver_id configs = [[{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.2', 'port': 5 }], [{ 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 5 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.3', 'port': 3 }], [{ 'ip': '10.0.0.2', 'port': 2 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }, { 'ip': '10.0.0.4', 'port': 4 }], [{ 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.1', 'port': 1 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.2', 'port': 5 }], [{ 'ip': '10.0.0.4', 'port': 4 }, { 'ip': '10.0.0.3', 'port': 3 }, { 'ip': '10.0.0.1', 'port': 1 }]] loads = [['10.0.0.1', 1, 1, 6, 10, 70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0], ['10.0.0.3', 3, 3, 4, 10, 70.0], ['10.0.0.4', 4, 2, 4, 10, 60.0], ['10.0.0.2', 5, 1, 2, 10, 30.0]] for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services) # Validates if a second run doesn't change anything for vdisk_id in sorted(vdisks.keys()): MDSServiceController.ensure_safety(vdisks[vdisk_id]) _check_reality(configs, loads, vdisks, mds_services)
def create_from_template(diskguid, devicename, pmachineguid, machinename='', machineguid=None): """ Create a disk from a template :param diskguid: Guid of the disk :param machinename: Name of the machine :param devicename: Device file name for the disk (eg: my_disk-flat.vmdk) :param pmachineguid: Guid of pmachine to create new vdisk on :param machineguid: Guid of the machine to assign disk to :return diskguid: Guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) if machineguid is not None: new_vdisk_vmachine = VMachine(machineguid) machinename = new_vdisk_vmachine.name disk_path = hypervisor.get_disk_path(machinename, devicename) description = '{0} {1}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyid', 'snapshotpolicyid', 'vmachine', 'vpool'] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError('The given vdisk does not belong to a template') if not vdisk.is_vtemplate: raise RuntimeError('The given vdisk is not a template') storagedriver = None for sd in vdisk.vpool.storagedrivers: if sd.storagerouter_guid in pmachine.storagerouters_guids: storagedriver = sd break if storagedriver is None: raise RuntimeError('Could not find Storage Driver') new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = new_vdisk_vmachine if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, new_vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Create disk from template {0} to new disk {1} to location {2}'.format(vdisk.name, new_vdisk.name, disk_path)) try: backend_config = MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]) volume_id = vdisk.storagedriver_client.create_clone_from_template(target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig([backend_config]), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver.storagedriver_id)) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) VDiskController.dtl_checkup.delay(vdisk_guid=new_vdisk.guid) except Exception as ex: logger.error('Clone disk on volumedriver level failed with exception: {0}'.format(str(ex))) try: VDiskController.clean_bad_disk(new_vdisk.guid) except Exception as ex2: logger.exception('Exception during exception handling of "create_clone_from_template" : {0}'.format(str(ex2))) raise ex return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': disk_path}
def shrink_vpool(cls, storagedriver_guid, offline_storage_router_guids=list()): """ Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well) :param storagedriver_guid: Guid of the StorageDriver to remove :type storagedriver_guid: str :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster. WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS :type offline_storage_router_guids: list :return: None :rtype: NoneType """ # TODO: Add logging # TODO: Unit test individual pieces of code # Validations storagedriver = StorageDriver(storagedriver_guid) storagerouter = storagedriver.storagerouter cls._logger.info( 'StorageDriver {0} - Deleting StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) vp_installer = VPoolInstaller(name=storagedriver.vpool.name) vp_installer.validate(storagedriver=storagedriver) sd_installer = StorageDriverInstaller(vp_installer=vp_installer, storagedriver=storagedriver) cls._logger.info( 'StorageDriver {0} - Checking availability of related StorageRouters' .format(storagedriver.guid, storagedriver.name)) sr_client_map = SSHClient.get_clients(endpoints=[ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ], user_names=['root']) sr_installer = StorageRouterInstaller(root_client=sr_client_map.get( storagerouter, {}).get('root'), storagerouter=storagerouter, vp_installer=vp_installer, sd_installer=sd_installer) offline_srs = sr_client_map.pop('offline') if sorted([sr.guid for sr in offline_srs ]) != sorted(offline_storage_router_guids): raise RuntimeError('Not all StorageRouters are reachable') if storagerouter not in offline_srs: mtpt_pids = sr_installer.root_client.run( "lsof -t +D '/mnt/{0}' || true".format( vp_installer.name.replace(r"'", r"'\''")), allow_insecure=True).splitlines() if len(mtpt_pids) > 0: raise RuntimeError( 'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}' .format(', '.join(mtpt_pids))) # Retrieve reachable StorageDrivers reachable_storagedrivers = [] for sd in vp_installer.vpool.storagedrivers: if sd.storagerouter not in sr_client_map: # StorageRouter is offline continue sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format( vp_installer.vpool.guid, sd.storagedriver_id) if Configuration.exists(sd_key) is True: path = Configuration.get_configuration_path(sd_key) with remote(sd.storagerouter.ip, [LocalStorageRouterClient]) as rem: try: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision( ) # 'Cheap' call to verify whether volumedriver is responsive cls._logger.info( 'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}' .format(storagedriver.guid, sd.name, sd.storagerouter.ip)) reachable_storagedrivers.append(sd) except Exception as exception: if not is_connection_failure(exception): raise if len(reachable_storagedrivers) == 0: raise RuntimeError( 'Could not find any responsive node in the cluster') # Start removal if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.SHRINKING) else: vp_installer.update_status(status=VPool.STATUSES.DELETING) # Clean up stale vDisks cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format( storagedriver.guid)) VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool) # Reconfigure the MDSes cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format( storagedriver.guid)) for vdisk_guid in storagerouter.vdisks_guids: try: MDSServiceController.ensure_safety( vdisk_guid=vdisk_guid, excluded_storagerouter_guids=[storagerouter.guid] + offline_storage_router_guids) except Exception: cls._logger.exception( 'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed' .format(storagedriver.guid, vdisk_guid)) # Validate that all MDSes on current StorageRouter have been moved away # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code vdisks = [] for mds in vp_installer.mds_services: for junction in mds.vdisks: vdisk = junction.vdisk if vdisk in vdisks: continue vdisks.append(vdisk) cls._logger.critical( 'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away' .format(storagedriver.guid, vdisk.guid, vdisk.name)) if len(vdisks) > 0: # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway vp_installer.update_status(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Not all MDS Services have been successfully migrated away') # Start with actual removal errors_found = False if storagerouter not in offline_srs: errors_found &= sd_installer.stop_services() errors_found &= vp_installer.configure_cluster_registry( exclude=[storagedriver], apply_on=reachable_storagedrivers) errors_found &= vp_installer.update_node_distance_map() errors_found &= vp_installer.remove_mds_services() errors_found &= sd_installer.clean_config_management() errors_found &= sd_installer.clean_model() if storagerouter not in offline_srs: errors_found &= sd_installer.clean_directories( mountpoints=StorageRouterController.get_mountpoints( client=sr_installer.root_client)) try: DiskController.sync_with_reality( storagerouter_guid=storagerouter.guid) except Exception: cls._logger.exception( 'StorageDriver {0} - Synchronizing disks with reality failed' .format(storagedriver.guid)) errors_found = True if vp_installer.storagedriver_amount > 1: # Update the vPool metadata and run DTL checkup vp_installer.vpool.metadata['caching_info'].pop( sr_installer.storagerouter.guid, None) vp_installer.vpool.save() try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except Exception: cls._logger.exception( 'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}' .format(storagedriver.guid, vp_installer.name, vp_installer.vpool.guid)) else: cls._logger.info( 'StorageDriver {0} - Removing vPool from model'.format( storagedriver.guid)) # Clean up model try: vp_installer.vpool.delete() except Exception: errors_found = True cls._logger.exception( 'StorageDriver {0} - Cleaning up vPool from the model failed' .format(storagedriver.guid)) Configuration.delete('/ovs/vpools/{0}'.format( vp_installer.vpool.guid)) cls._logger.info('StorageDriver {0} - Running MDS checkup'.format( storagedriver.guid)) try: MDSServiceController.mds_checkup() except Exception: cls._logger.exception( 'StorageDriver {0} - MDS checkup failed'.format( storagedriver.guid)) # Update vPool status if errors_found is True: if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise RuntimeError( '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information' ) if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info( 'StorageDriver {0} - Deleted StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) if len(VPoolList.get_vpools()) == 0: cluster_name = ArakoonInstaller.get_cluster_name('voldrv') if ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name)['internal'] is True: cls._logger.debug( 'StorageDriver {0} - Removing Arakoon cluster {1}'.format( storagedriver.guid, cluster_name)) try: installer = ArakoonInstaller(cluster_name=cluster_name) installer.load() installer.delete_cluster() except Exception: cls._logger.exception( 'StorageDriver {0} - Delete voldrv Arakoon cluster failed' .format(storagedriver.guid)) service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in list(service_type.services): if service.name == service_name: service.delete() # Remove watcher volumedriver service if last StorageDriver on current StorageRouter if len( storagerouter.storagedrivers ) == 0 and storagerouter not in offline_srs: # ensure client is initialized for StorageRouter try: if cls._service_manager.has_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client): cls._service_manager.stop_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) cls._service_manager.remove_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) except Exception: cls._logger.exception( 'StorageDriver {0} - {1} service deletion failed'.format( storagedriver.guid, ServiceFactory.SERVICE_WATCHER_VOLDRV))
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_created', 'metadata': { 'name': vm_object['name'] } }) elif vmachine.name != vm_object['name']: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vmachine_renamed', 'metadata': { 'old_name': vmachine.name, 'new_name': vm_object['name'] } }) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] for disk in vm_object['disks']: if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: vdisk = VDiskList.get_by_devicename_and_vpool( disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id( str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] MDSServiceController.ensure_safety(vdisk) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_attached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': disk['name'] } }) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire( MessageController.Type.EVENT, { 'type': 'vdisk_detached', 'metadata': { 'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name } }) vdisk.vmachine = None vdisk.save() logger.info( 'Updating vMachine finished (name {}, {} vdisks (re)linked)'. format(vmachine.name, vdisks_synced)) except Exception as ex: logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename=None, machineguid=None, detached=False): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) if machinename is None: description = devicename else: description = '{0} {1}'.format(machinename, devicename) properties_to_clone = ['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup'] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) if machineguid is not None and detached is True: raise ValueError('A vMachine GUID was specified while detached is True') if snapshotid is None: # Create a new snapshot timestamp = str(int(time.time())) metadata = {'label': '', 'is_consistent': False, 'timestamp': timestamp, 'machineguid': machineguid, 'is_automatic': True} VDiskController.create_snapshot(diskguid, metadata) tries = 25 # About 5 minutes while snapshotid is None and tries > 0: tries -= 1 time.sleep(25 - tries) vdisk.invalidate_dynamics(['snapshots']) snapshots = [snapshot for snapshot in vdisk.snapshots if snapshot['in_backend'] is True and snapshot['timestamp'] == timestamp] if len(snapshots) == 1: snapshotid = snapshots[0]['guid'] if snapshotid is None: raise RuntimeError('Could not find created snapshot in time') new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid if detached is False: new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() try: storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with id {0}'.format(vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {0} of disk {1} to location {2}'.format(snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id) ) except Exception as ex: logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex)) new_vdisk.delete() VDiskController.delete_volume(location) raise new_vdisk.volume_id = volume_id new_vdisk.save() try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Caught exception during "ensure_safety" {0}'.format(ex)) return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location}
def _execute_scrub(queue, vpool, scrub_info, scrub_dir, error_messages): def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs storagerouter = scrub_info['storage_router'] partition_guid = scrub_info['partition_guid'] volatile_client = VolatileFactory.get_client() backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, partition_guid) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get( False ) # Raises Empty Exception when queue is empty, so breaking the while True loop volatile_key = 'ovs_scrubbing_vdisk_{0}'.format(vdisk_guid) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_dir)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: GenericController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Check if vDisk is already being scrubbed if volatile_client.add(key=volatile_key, value=volatile_key, time=24 * 60 * 60) is False: GenericController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because vDisk is already being scrubbed' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_dir, log_sinks=[ LogHandler.get_sink_path( 'scrubber_{0}'.format(vpool.name), allow_override=True, forced_target_type='file') ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) GenericController._logger.exception(message) finally: # Remove vDisk from volatile memory volatile_client.delete(volatile_key) except Empty: # Raised when all items have been fetched from the queue GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) GenericController._logger.exception(message)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod(scrub_directory, 0777) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)} ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety(VDisk(vdisk_guid)) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: res = locked_client.scrub(work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)], backend_config=Configuration.get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result(scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
def _execute_scrub_work(scrub_location, vdisk_guids): def verify_mds_config(current_vdisk): """ Retrieve the metadata backend configuration for vDisk :param current_vdisk: vDisk to retrieve configuration for :type current_vdisk: vDisk :return: MDS configuration for vDisk """ current_vdisk.invalidate_dynamics(['info']) vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs logger.info('Execute Scrub - Started') logger.info( 'Execute Scrub - Scrub location - {0}'.format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Started'.format( vdisk.guid, vdisk.name)) vdisk.invalidate_dynamics(['storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[ vdisk. storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get('ip') != storagedriver.storagerouter.ip: logger.debug( 'Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover' .format(vdisk.guid, vdisk.name)) MDSServiceController.ensure_safety(vdisk) configs = verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: skipped += 1 logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local' .format(vdisk.guid, vdisk.name)) continue with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work' .format(vdisk.guid, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub( work_unit, scrub_location) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied' .format(vdisk.guid, vdisk.name)) else: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - No scrubbing required' .format(vdisk.guid, vdisk.name)) except Exception as ex: failures.append( 'Failed scrubbing work unit for volume {0} with guid {1}: {2}' .format(vdisk.name, vdisk.guid, ex)) failed = len(failures) logger.info( 'Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}' .format((total - failed - skipped), failed, skipped)) if failed > 0: raise Exception('\n - '.join(failures)) return vdisk_guids
def update_vmachine_config(vmachine, vm_object, pmachine=None): """ Update a vMachine configuration with a given vMachine configuration :param vmachine: Virtual Machine to update :param vm_object: New virtual machine info :param pmachine: Physical machine of the virtual machine """ try: vdisks_synced = 0 if vmachine.name is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_created', 'metadata': {'name': vm_object['name']}}) elif vmachine.name != vm_object['name']: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_renamed', 'metadata': {'old_name': vmachine.name, 'new_name': vm_object['name']}}) if pmachine is not None: vmachine.pmachine = pmachine vmachine.name = vm_object['name'] vmachine.hypervisor_id = vm_object['id'] vmachine.devicename = vm_object['backing']['filename'] vmachine.save() # Updating and linking disks storagedrivers = StorageDriverList.get_storagedrivers() datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers]) vdisk_guids = [] mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename)) for disk in vm_object['disks']: ensure_safety = False if disk['datastore'] in vm_object['datastores']: datastore = vm_object['datastores'][disk['datastore']] if datastore in datastores: try: mutex.acquire(wait=10) vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool) if vdisk is None: # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario vdisk = VDisk() vdisk.vpool = datastores[datastore].vpool vdisk.reload_client() vdisk.devicename = disk['filename'] vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename'])) vdisk.size = vdisk.info['volume_size'] vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} # Create the disk in a locked context, but don't execute long running-task in same context vdisk.save() ensure_safety = True finally: mutex.release() if ensure_safety: MDSServiceController.ensure_safety(vdisk) VDiskController.dtl_checkup(vdisk_guid=vdisk.guid) # Update the disk with information from the hypervisor if vdisk.vmachine is None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_attached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': disk['name']}}) vdisk.vmachine = vmachine vdisk.name = disk['name'] vdisk.order = disk['order'] vdisk.save() vdisk_guids.append(vdisk.guid) vdisks_synced += 1 for vdisk in vmachine.vdisks: if vdisk.guid not in vdisk_guids: MessageController.fire(MessageController.Type.EVENT, {'type': 'vdisk_detached', 'metadata': {'vmachine_name': vmachine.name, 'vdisk_name': vdisk.name}}) vdisk.vmachine = None vdisk.save() VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format( vmachine.name, vdisks_synced )) except Exception as ex: VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex))) raise
def add_vpool(cls, parameters): """ Add a vPool to the machine this task is running on :param parameters: Parameters for vPool creation :type parameters: dict :return: None :rtype: NoneType """ # TODO: Add logging cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters)) # VALIDATIONS if not isinstance(parameters, dict): raise ValueError( 'Parameters passed to create a vPool should be of type dict') # Check StorageRouter existence storagerouter = StorageRouterList.get_by_ip( ip=parameters.get('storagerouter_ip')) if storagerouter is None: raise RuntimeError('Could not find StorageRouter') # Validate requested vPool configurations vp_installer = VPoolInstaller(name=parameters.get('vpool_name')) vp_installer.validate(storagerouter=storagerouter) # Validate requested StorageDriver configurations cls._logger.info( 'vPool {0}: Validating StorageDriver configurations'.format( vp_installer.name)) sd_installer = StorageDriverInstaller( vp_installer=vp_installer, configurations={ 'storage_ip': parameters.get('storage_ip'), 'caching_info': parameters.get('caching_info'), 'backend_info': { 'main': parameters.get('backend_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('backend_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('backend_info_fc') }, 'connection_info': { 'main': parameters.get('connection_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('connection_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('connection_info_fc') }, 'sd_configuration': parameters.get('config_params') }) partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format( storagerouter.guid)) try: # VPOOL CREATION # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state) if vp_installer.is_new is True: vp_installer.create(rdma_enabled=sd_installer.rdma_enabled) vp_installer.configure_mds( config=parameters.get('mds_config_params', {})) else: vp_installer.update_status(status=VPool.STATUSES.EXTENDING) # ADDITIONAL VALIDATIONS # Check StorageRouter connectivity cls._logger.info( 'vPool {0}: Validating StorageRouter connectivity'.format( vp_installer.name)) linked_storagerouters = [storagerouter] if vp_installer.is_new is False: linked_storagerouters += [ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ] sr_client_map = SSHClient.get_clients( endpoints=linked_storagerouters, user_names=['ovs', 'root']) offline_nodes = sr_client_map.pop('offline') if storagerouter in offline_nodes: raise RuntimeError( 'Node on which the vPool is being {0} is not reachable'. format('created' if vp_installer.is_new is True else 'extended')) sr_installer = StorageRouterInstaller( root_client=sr_client_map[storagerouter]['root'], sd_installer=sd_installer, vp_installer=vp_installer, storagerouter=storagerouter) # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context partitions_mutex.acquire(wait=60) sr_installer.partition_info = StorageRouterController.get_partition_info( storagerouter_guid=storagerouter.guid) sr_installer.validate_vpool_extendable() sr_installer.validate_global_write_buffer( requested_size=parameters.get('writecache_size', 0)) sr_installer.validate_local_cache_size( requested_proxies=parameters.get('parallelism', {}).get( 'proxies', 2)) # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS sd_installer.create() sd_installer.create_partitions() partitions_mutex.release() vp_installer.refresh_metadata() except Exception: cls._logger.exception( 'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}' .format(vp_installer.name, storagerouter.name)) partitions_mutex.release() vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise # Arakoon setup counter = 0 while counter < 300: try: if StorageDriverController.manual_voldrv_arakoon_checkup( ) is True: break except Exception: cls._logger.exception( 'Arakoon checkup for voldrv cluster failed') vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise counter += 1 time.sleep(1) if counter == 300: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Arakoon checkup for the StorageDriver cluster could not be started' ) # Cluster registry try: vp_installer.configure_cluster_registry(allow_raise=True) except Exception: if vp_installer.is_new is True: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) else: vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE) raise try: sd_installer.setup_proxy_configs() sd_installer.configure_storagedriver_service() DiskController.sync_with_reality(storagerouter.guid) MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vp_installer.vpool) # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver) vp_installer.vpool.invalidate_dynamics('configuration') if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[ 'mds_config']['mds_safety'] != vp_installer.mds_safety: Configuration.set( key='/ovs/vpools/{0}/mds_config|mds_safety'.format( vp_installer.vpool.guid), value=vp_installer.mds_safety) sd_installer.start_services( ) # Create and start watcher volumedriver, DTL, proxies and StorageDriver services # Post creation/extension checkups mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vp_installer.vpool, offline_nodes=offline_nodes) for sr, clients in sr_client_map.iteritems(): for current_storagedriver in [ sd for sd in sr.storagedrivers if sd.vpool_guid == vp_installer.vpool.guid ]: storagedriver_config = StorageDriverConfiguration( vpool_guid=vp_installer.vpool.guid, storagedriver_id=current_storagedriver.storagedriver_id ) if storagedriver_config.config_missing is False: # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ sr.guid]) storagedriver_config.save(client=clients['ovs']) # Everything's reconfigured, refresh new cluster configuration for current_storagedriver in vp_installer.vpool.storagedrivers: if current_storagedriver.storagerouter not in sr_client_map: continue vp_installer.vpool.storagedriver_client.update_cluster_node_configs( str(current_storagedriver.storagedriver_id), req_timeout_secs=10) except Exception: cls._logger.exception('vPool {0}: Creation failed'.format( vp_installer.name)) vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise # When a node is offline, we can run into errors, but also when 1 or more volumes are not running # Scheduled tasks below, so don't really care whether they succeed or not try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except: pass for vdisk in vp_installer.vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except: pass vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info('Add vPool {0} ended successfully'.format( vp_installer.name))
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int( mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({ 'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': [] }) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({ 'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': [] }) buckets.append({ 'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': [] }) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot[ 'snapshots'].iteritems(): bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot[ 'is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp'] ] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp'] ] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot( diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) total = 0 failed = 0 skipped = 0 storagedrivers = {} for vdisk in vdisks: try: total += 1 # Load the vDisk's StorageDriver vdisk.invalidate_dynamics(['info', 'storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[ vdisk. storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: # The MDS master is not local. Trigger an MDS handover and try again logger.debug( 'MDS for volume {0} is not local. Trigger handover'. format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: skipped += 1 logger.info( 'Skipping scrubbing work unit for volume {0}: MDS master is not local' .format(vdisk.volume_id)) continue work_units = vdisk.storagedriver_client.get_scrubbing_workunits( str(vdisk.volume_id)) for work_unit in work_units: scrubbing_result = _storagedriver_scrubber.scrub( work_unit, str(storagedriver.mountpoint_temp)) vdisk.storagedriver_client.apply_scrubbing_result( scrubbing_result) except Exception, ex: failed += 1 logger.info( 'Failed scrubbing work unit for volume {0}: {1}'.format( vdisk.volume_id, ex))
def create_from_template(diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None): """ Create a disk from a template @param devicename: device file name for the disk (eg: mydisk-flat.vmdk) @param machineguid: guid of the machine to assign disk to @return diskguid: guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) disk_path = hypervisor.get_disk_path(machinename, devicename) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyid', 'snapshotpolicyid', 'vmachine', 'vpool' ] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError('The given vdisk does not belong to a template') if storagedriver_guid is not None: storagedriver_id = StorageDriver( storagedriver_guid).storagedriver_id else: storagedriver_id = vdisk.storagedriver_id storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) if storagedriver is None: raise RuntimeError( 'Could not find StorageDriver with id {0}'.format( storagedriver_id)) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename( disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = VMachine( machineguid) if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds( storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info( 'Create disk from template {} to new disk {} to location {}'. format(vdisk.name, new_vdisk.name, disk_path)) try: volume_id = vdisk.storagedriver_client.create_clone_from_template( target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig([ MDSNodeConfig(address=str( mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]) ]), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver_id)) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error( 'Clone disk on volumedriver level failed with exception: {0}'. format(str(ex))) new_vdisk.delete() raise return { 'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': disk_path }
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename=None, machineguid=None, detached=False): """ Clone a disk :param diskguid: Guid of the disk to clone :param snapshotid: ID of the snapshot to clone from :param devicename: Name of the device to use in clone's description :param pmachineguid: Guid of the physical machine :param machinename: Name of the machine the disk is attached to :param machineguid: Guid of the machine :param detached: Boolean indicating the disk is attached to a machine or not """ # 1. Validations name_regex = "^[0-9a-zA-Z][-_a-zA-Z0-9]{1,48}[a-zA-Z0-9]$" if not re.match(name_regex, devicename): raise RuntimeError("Invalid name for virtual disk clone") if VDiskList.get_vdisk_by_name(vdiskname=devicename) is not None: raise RuntimeError("A virtual disk with this name already exists") vdisk = VDisk(diskguid) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with ID {0}'.format(vdisk.storagedriver_id)) if machineguid is not None and detached is True: raise ValueError('A vMachine GUID was specified while detached is True') # 2. Create new snapshot if required if snapshotid is None: timestamp = str(int(time.time())) metadata = {'label': '', 'is_consistent': False, 'timestamp': timestamp, 'machineguid': machineguid, 'is_automatic': True} sd_snapshot_id = VDiskController.create_snapshot(diskguid, metadata) tries = 25 # 5 minutes while snapshotid is None and tries > 0: time.sleep(25 - tries) tries -= 1 vdisk.invalidate_dynamics(['snapshots']) for snapshot in vdisk.snapshots: if snapshot['guid'] != sd_snapshot_id: continue if snapshot['in_backend'] is True: snapshotid = snapshot['guid'] if snapshotid is None: try: VDiskController.delete_snapshot(diskguid=diskguid, snapshotid=sd_snapshot_id) except: pass raise RuntimeError('Could not find created snapshot in time') # 3. Model new cloned virtual disk hypervisor = Factory.get(PMachine(pmachineguid)) location = hypervisor.get_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup']) new_vdisk.parent_vdisk = vdisk new_vdisk.name = devicename new_vdisk.description = devicename if machinename is None else '{0} {1}'.format(machinename, devicename) new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid if detached is False: new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() # 4. Configure Storage Driver try: mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {0} of disk {1} to location {2}'.format(snapshotid, vdisk.name, location)) backend_config = MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]) volume_id = vdisk.storagedriver_client.create_clone(target_path=location, metadata_backend_config=backend_config, parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id)) except Exception as ex: logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex)) try: VDiskController.clean_bad_disk(new_vdisk.guid) except Exception as ex2: logger.exception('Exception during exception handling of "create_clone_from_template" : {0}'.format(str(ex2))) raise new_vdisk.volume_id = volume_id new_vdisk.save() # 5. Check MDS & DTL for new clone try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Caught exception during "ensure_safety" {0}'.format(ex)) VDiskController.dtl_checkup.delay(vdisk_guid=new_vdisk.guid) return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location}
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup' ] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine( machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() try: storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError( 'Could not find StorageDriver with id {0}'.format( vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds( storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {} of disk {} to location {}'.format( snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig([ MDSNodeConfig(address=str( mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]) ]), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id)) except Exception as ex: logger.error( 'Caught exception during clone, trying to delete the volume. {0}' .format(ex)) new_vdisk.delete() VDiskController.delete_volume(location) raise new_vdisk.volume_id = volume_id new_vdisk.save() try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error( 'Caught exception during "ensure_safety" {0}'.format(ex)) return { 'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location }
def create_from_template( diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None ): """ Create a disk from a template @param parentdiskguid: guid of the disk @param location: location where virtual device should be created (eg: myVM) @param devicename: device file name for the disk (eg: mydisk-flat.vmdk) @param machineguid: guid of the machine to assign disk to @return diskguid: guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) disk_path = hypervisor.get_disk_path(machinename, devicename) description = "{} {}".format(machinename, devicename) properties_to_clone = [ "description", "size", "type", "retentionpolicyid", "snapshotpolicyid", "vmachine", "vpool", ] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError("The given vdisk does not belong to a template") if storagedriver_guid is not None: storagedriver_id = StorageDriver(storagedriver_guid).storagedriver_id else: storagedriver_id = vdisk.storagedriver_id storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError("Could not find StorageDriver with id {0}".format(storagedriver_id)) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = "{}-clone".format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError("Could not find a MDS service") logger.info( "Create disk from template {} to new disk {} to location {}".format(vdisk.name, new_vdisk.name, disk_path) ) try: volume_id = vdisk.storagedriver_client.create_clone_from_template( target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig( [ MDSNodeConfig( address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0] ) ] ), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver_id), ) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error("Clone disk on volumedriver level failed with exception: {0}".format(str(ex))) new_vdisk.delete() raise # Allow "regular" users to use this volume # Do not use run for other user than ovs as it blocks asking for root password # Do not use run_local for other user as it doesn't have permission # So this method only works if this is called by root or ovs storagerouter = StorageRouter(new_vdisk.storagerouter_guid) mountpoint = storagedriver.mountpoint location = "{0}{1}".format(mountpoint, disk_path) client = SSHClient.load(storagerouter.pmachine.ip) print(client.run('chmod 664 "{0}"'.format(location))) print(client.run('chown ovs:ovs "{0}"'.format(location))) return {"diskguid": new_vdisk.guid, "name": new_vdisk.name, "backingdevice": disk_path}
def test_storagedriver_config_set(self): """ Validates whether storagedriver configuration is generated as expected """ PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3) vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure( { 'vpools': [1, 2], 'storagerouters': [1, 2, 3, 4, 5, 6], 'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4), (5, 2, 4), (6, 2, 5), (7, 2, 6)], # (<id>, <vpool_id>, <sr_id>) 'mds_services': [(1, 1), (2, 1), (3, 2), (4, 3), (5, 4), (6, 5), (7, 6), (8, 7), (9, 7)] } # (<id>, <sd_id>) ) vdisks = {} start_id = 1 for mds_service in mds_services.itervalues(): vdisks.update( self._create_vdisks_for_mds_service(10, start_id, mds_service=mds_service)) start_id += 10 mds_services[1].capacity = 11 # on 1, vpool 1 mds_services[1].save() mds_services[2].capacity = 20 # on 1, vpool 1 mds_services[2].save() mds_services[3].capacity = 12 # on 2, vpool 1 mds_services[3].save() mds_services[4].capacity = 14 # on 3, vpool 1 mds_services[4].save() mds_services[5].capacity = 16 # on 4, vpool 1 mds_services[5].save() mds_services[6].capacity = 11 # on 4, vpool 2 mds_services[6].save() mds_services[7].capacity = 13 # on 5, vpool 2 mds_services[7].save() mds_services[8].capacity = 19 # on 6, vpool 2 mds_services[8].save() mds_services[9].capacity = 15 # on 6, vpool 2 mds_services[9].save() config = MDSServiceController.get_mds_storagedriver_config_set( vpools[1]) expected = { storagerouters[1].guid: [{ 'host': '10.0.0.1', 'port': 2 }, { 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.3', 'port': 4 }], storagerouters[2].guid: [{ 'host': '10.0.0.2', 'port': 3 }, { 'host': '10.0.0.1', 'port': 2 }, { 'host': '10.0.0.4', 'port': 5 }], storagerouters[3].guid: [{ 'host': '10.0.0.3', 'port': 4 }, { 'host': '10.0.0.1', 'port': 2 }, { 'host': '10.0.0.4', 'port': 5 }], storagerouters[4].guid: [{ 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.1', 'port': 2 }, { 'host': '10.0.0.3', 'port': 4 }] } self.assertDictEqual( config, expected, 'Test 1. Got:\n{0}'.format(json.dumps(config, indent=2))) mds_services[2].capacity = 10 # on 1, vpool 1 mds_services[2].save() config = MDSServiceController.get_mds_storagedriver_config_set( vpools[1]) expected = { storagerouters[1].guid: [{ 'host': '10.0.0.1', 'port': 1 }, { 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.3', 'port': 4 }], storagerouters[2].guid: [{ 'host': '10.0.0.2', 'port': 3 }, { 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.3', 'port': 4 }], storagerouters[3].guid: [{ 'host': '10.0.0.3', 'port': 4 }, { 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.2', 'port': 3 }], storagerouters[4].guid: [{ 'host': '10.0.0.4', 'port': 5 }, { 'host': '10.0.0.3', 'port': 4 }, { 'host': '10.0.0.2', 'port': 3 }] } self.assertDictEqual( config, expected, 'Test 2. Got:\n{0}'.format(json.dumps(config, indent=2)))
def create_from_template(diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None): """ Create a disk from a template @param devicename: device file name for the disk (eg: mydisk-flat.vmdk) @param machineguid: guid of the machine to assign disk to @return diskguid: guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) disk_path = hypervisor.get_disk_path(machinename, devicename) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyid', 'snapshotpolicyid', 'vmachine', 'vpool'] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError('The given vdisk does not belong to a template') if storagedriver_guid is not None: storagedriver_id = StorageDriver(storagedriver_guid).storagedriver_id else: storagedriver_id = vdisk.storagedriver_id storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with id {0}'.format(storagedriver_id)) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Create disk from template {} to new disk {} to location {}'.format( vdisk.name, new_vdisk.name, disk_path )) try: volume_id = vdisk.storagedriver_client.create_clone_from_template( target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver_id) ) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Clone disk on volumedriver level failed with exception: {0}'.format(str(ex))) new_vdisk.delete() raise return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': disk_path}
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format( scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format( vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format( vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod( scrub_directory, 0777 ) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service( name=alba_proxy_service, client=client ) is True and ServiceManager.get_service_status( name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|storagedriver'.format( machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get( 'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key) } ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get( 'ovs/vpools/{0}/hosts/{1}/config'.format( vpool.guid, vpool.storagedrivers[0].storagedriver_id ))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set( backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service( name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[ LogHandler.get_sink_path( 'scrubber', allow_override=True) ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)