def sync_with_hypervisor(vmachineguid, storagedriver_id=None): """ Updates a given vmachine with data retrieved from a given pmachine :param vmachineguid: Guid of the virtual machine :param storagedriver_id: Storage Driver hosting the vmachine """ try: vmachine = VMachine(vmachineguid) except Exception as ex: VMachineController._logger.info('Cannot get VMachine object: {0}'.format(str(ex))) raise vm_object = None if vmachine.pmachine.mgmtcenter and storagedriver_id is not None and vmachine.devicename is not None: try: mgmt_center = Factory.get_mgmtcenter(vmachine.pmachine) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) VMachineController._logger.info('Syncing vMachine (name {0}) with Management center {1}'.format(vmachine.name, vmachine.pmachine.mgmtcenter.name)) vm_object = mgmt_center.get_vm_agnostic_object(devicename=vmachine.devicename, ip=storagedriver.storage_ip, mountpoint=storagedriver.mountpoint) except Exception as ex: VMachineController._logger.info('Error while fetching vMachine info from management center: {0}'.format(str(ex))) if vm_object is None and storagedriver_id is None and vmachine.hypervisor_id is not None and vmachine.pmachine is not None: try: # Only the vmachine was received, so base the sync on hypervisor id and pmachine hypervisor = Factory.get(vmachine.pmachine) VMachineController._logger.info('Syncing vMachine (name {0})'.format(vmachine.name)) vm_object = hypervisor.get_vm_agnostic_object(vmid=vmachine.hypervisor_id) except Exception as ex: VMachineController._logger.info('Error while fetching vMachine info from hypervisor: {0}'.format(str(ex))) if vm_object is None and storagedriver_id is not None and vmachine.devicename is not None: try: # Storage Driver id was given, using the devicename instead (to allow hypervisor id updates # which can be caused by re-adding a vm to the inventory) pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) hypervisor = Factory.get(pmachine) if not hypervisor.file_exists(storagedriver, hypervisor.clean_vmachine_filename(vmachine.devicename)): return vmachine.pmachine = pmachine vmachine.save() VMachineController._logger.info('Syncing vMachine (device {0}, ip {1}, mountpoint {2})'.format(vmachine.devicename, storagedriver.storage_ip, storagedriver.mountpoint)) vm_object = hypervisor.get_vm_object_by_devicename(devicename=vmachine.devicename, ip=storagedriver.storage_ip, mountpoint=storagedriver.mountpoint) except Exception as ex: VMachineController._logger.info('Error while fetching vMachine info from hypervisor using devicename: {0}'.format(str(ex))) if vm_object is None: message = 'Not enough information to sync vmachine' VMachineController._logger.info('Error: {0}'.format(message)) raise RuntimeError(message) VMachineController.update_vmachine_config(vmachine, vm_object)
def resize_from_voldrv(volumename, volumesize, volumepath, storagedriver_id): """ Resize a disk Triggered by volumedriver messages on the queue @param volumepath: path on hypervisor to the volume @param volumename: volume id of the disk @param volumesize: size of the volume """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) hypervisor = Factory.get(pmachine) volumepath = hypervisor.clean_backing_disk_filename(volumepath) mutex = VolatileMutex('{}_{}'.format(volumename, volumepath)) try: mutex.acquire(wait=30) disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is None: disk = VDiskList.get_by_devicename_and_vpool(volumepath, storagedriver.vpool) if disk is None: disk = VDisk() finally: mutex.release() disk.devicename = volumepath disk.volume_id = volumename disk.size = volumesize disk.vpool = storagedriver.vpool disk.save() VDiskController.sync_with_mgmtcenter(disk, pmachine, storagedriver) MDSServiceController.ensure_safety(disk)
def delete_from_voldrv(name, storagedriver_id): """ This method will delete a vmachine based on the name of the vmx given """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None vm = VMachineList.get_by_devicename_and_vpool(name, vpool) if vm is not None: MessageController.fire(MessageController.Type.EVENT, { 'type': 'vmachine_deleted', 'metadata': { 'name': vm.name } }) vm.delete(abandon=['vdisks'])
def resize_from_voldrv(volumename, volumesize, volumepath, storagedriver_id): """ Resize a disk Triggered by volumedriver messages on the queue @param volumepath: path on hypervisor to the volume @param volumename: volume id of the disk @param volumesize: size of the volume """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) hypervisor = Factory.get(pmachine) volumepath = hypervisor.clean_backing_disk_filename(volumepath) mutex = VolatileMutex('{}_{}'.format(volumename, volumepath)) try: mutex.acquire(wait=30) disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is None: disk = VDiskList.get_by_devicename_and_vpool( volumepath, storagedriver.vpool) if disk is None: disk = VDisk() finally: mutex.release() disk.devicename = volumepath disk.volume_id = volumename disk.size = volumesize disk.vpool = storagedriver.vpool disk.save()
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) else: # No management Center, cannot update status via api logger.info('Updating status of pmachine {0} using SSHClient'.format(pmachine.name)) host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') configuration_dir = EtcdConfiguration.get('/ovs/framework/paths|cfgdir') logger.info('SSHClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) with Remote(client.ip, [LocalStorageRouterClient]) as remote: lsrc = remote.LocalStorageRouterClient('{0}/storagedriver/storagedriver/{1}.json'.format(configuration_dir, storagedriver.vpool.name)) lsrc.server_revision() logger.info('LocalStorageRouterClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) except Exception as ex: logger.error('Connectivity check failed, assuming host {0} is halted. {1}'.format(pmachine.name, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load(storagedriver.vpool) storagedriver_client.mark_node_offline(str(storagedriver.storagedriver_id))
def create_volume(self, target_path, metadata_backend_config, volume_size, node_id, req_timeout_secs=None): """ Create a mocked volume """ _ = req_timeout_secs from ovs.dal.lists.storagedriverlist import StorageDriverList volume_id = str(uuid.uuid4()) storagedriver = StorageDriverList.get_by_storagedriver_id(node_id) if storagedriver is None: raise ValueError( 'Failed to retrieve storagedriver with ID {0}'.format(node_id)) StorageRouterClient.vrouter_id[self.vpool_guid][volume_id] = node_id StorageRouterClient._metadata_backend_config[ self.vpool_guid][volume_id] = metadata_backend_config StorageRouterClient.volumes[self.vpool_guid][volume_id] = { 'volume_id': volume_id, 'volume_size': volume_size, 'target_path': target_path } return volume_id
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = '{} {}'.format(machinename, devicename) properties_to_clone = ['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup'] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() try: storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with id {0}'.format(vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {} of disk {} to location {}'.format(snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id) ) except Exception as ex: logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex)) new_vdisk.delete() VDiskController.delete_volume(location) raise new_vdisk.volume_id = volume_id new_vdisk.save() try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Caught exception during "ensure_safety" {0}'.format(ex)) return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location}
def _log(task, kwargs, storagedriver_id): log = Log() log.source = 'VOLUMEDRIVER_EVENT' log.module = task.__class__.__module__ log.method = task.__class__.__name__ log.method_kwargs = kwargs log.time = time.time() log.storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) log.save()
def _log(task, kwargs, storagedriver_id): log = Log() log.source = 'VOLUMEDRIVER_EVENT' log.module = task.__class__.__module__ log.method = task.__class__.__name__ log.method_kwargs = kwargs log.time = time.time() log.storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) log.save()
def up_and_running(storagedriver_id): """ Volumedriver informs us that the service is completely started. Post-start events can be executed :param storagedriver_id: ID of the storagedriver """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) storagedriver.startup_counter += 1 storagedriver.save()
def get_storagedriver_by_id(storagedriver_id): """ Fetches the storagedriver with its storagedriver_id :param storagedriver_id: id of the storagedriver :type storagedriver_id: str :return: The storagedriver DAL object :rtype: ovs.dal.hybrids.storagedriver.STORAGEDRIVER """ return StorageDriverList.get_by_storagedriver_id(storagedriver_id)
def _execute_scrub_work(scrub_location, vdisk_guids): def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics(['info']) vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs ScheduledTaskController._logger.info('Execute Scrub - Started') ScheduledTaskController._logger.info('Execute Scrub - Scrub location - {0}'.format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Started'.format(vdisk.guid, vdisk.name)) vdisk.invalidate_dynamics(['storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = _verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.debug('Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover'.format(vdisk.guid, vdisk.name)) MDSServiceController.ensure_safety(vdisk) configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: skipped += 1 ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local'.format(vdisk.guid, vdisk.name)) continue with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work'.format(vdisk.guid, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub(work_unit, scrub_location, log_sinks=[SCRUBBER_LOGFILE_LOCATION]) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied'.format(vdisk.guid, vdisk.name)) else: ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - No scrubbing required'.format(vdisk.guid, vdisk.name)) except Exception as ex: failures.append('Failed scrubbing work unit for volume {0} with guid {1}: {2}'.format(vdisk.name, vdisk.guid, ex)) failed = len(failures) ScheduledTaskController._logger.info('Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}'.format((total - failed - skipped), failed, skipped)) if failed > 0: raise Exception('\n - '.join(failures)) return vdisk_guids
def migrate(self, volume_id, node_id, force_restart, req_timeout_secs=None): """ Dummy migrate method """ _ = force_restart, req_timeout_secs from ovs.dal.lists.storagedriverlist import StorageDriverList storagedriver = StorageDriverList.get_by_storagedriver_id(node_id) if storagedriver is None: raise ValueError("Failed to retrieve storagedriver with ID {0}".format(node_id)) StorageRouterClient.vrouter_id[self.vpool_guid][volume_id] = node_id
def delete(diskguid): """ Delete a vdisk through API @param diskguid: GUID of the vdisk to delete """ vdisk = VDisk(diskguid) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) location = os.path.join(storagedriver.mountpoint, vdisk.devicename) logger.info('Deleting disk {0} on location {1}'.format(vdisk.name, location)) VDiskController.delete_volume(location=location) logger.info('Deleted disk {0}'.format(location))
def migrate(self, volume_id, node_id, force_restart, req_timeout_secs=None): """ Dummy migrate method """ _ = force_restart, req_timeout_secs from ovs.dal.lists.storagedriverlist import StorageDriverList storagedriver = StorageDriverList.get_by_storagedriver_id(node_id) if storagedriver is None: raise ValueError('Failed to retrieve storagedriver with ID {0}'.format(node_id)) StorageRouterClient.vrouter_id[self.vpool_guid][volume_id] = node_id
def mountpoint_available_from_voldrv(mountpoint, storagedriver_id): """ Hook for (re)exporting the NFS mountpoint """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def _log(task, kwargs, storagedriver_id): """ Log an event """ metadata = {'storagedriver': StorageDriverList.get_by_storagedriver_id(storagedriver_id).guid} _logger = LogHandler.get('log', name='volumedriver_event') _logger.info('[{0}.{1}] - {2} - {3}'.format( task.__class__.__module__, task.__class__.__name__, json.dumps(kwargs), json.dumps(metadata) ))
def _execute_scrub_work(scrub_location, vdisk_guids): def verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics(["info"]) vdisk_configs = current_vdisk.info["metadata_backend_config"] if len(vdisk_configs) == 0: raise RuntimeError("Could not load MDS configuration") return vdisk_configs logger.info("Scrub location: {0}".format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver logger.info("Scrubbing virtual disk {0} with guid {1}".format(vdisk.name, vdisk.guid)) vdisk.invalidate_dynamics(["storagedriver_id"]) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id ) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get("ip") != storagedriver.storagerouter.ip: logger.debug("MDS for volume {0} is not local. Trigger handover".format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) configs = verify_mds_config(current_vdisk=vdisk) if configs[0].get("ip") != storagedriver.storagerouter.ip: skipped += 1 logger.info( "Skipping scrubbing work unit for volume {0}: MDS master is not local".format( vdisk.volume_id ) ) continue with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub(work_unit, scrub_location) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: logger.info("Scrubbing successfully applied") except Exception, ex: failures.append( "Failed scrubbing work unit for volume {0} with guid {1}: {2}".format(vdisk.name, vdisk.guid, ex) )
def _log(task, kwargs, storagedriver_id): """ Log an event """ metadata = { 'storagedriver': StorageDriverList.get_by_storagedriver_id(storagedriver_id).guid } _logger = LogHandler.get('log', name='volumedriver_event') _logger.info('[{0}.{1}] - {2} - {3}'.format(task.__class__.__module__, task.__class__.__name__, json.dumps(kwargs), json.dumps(metadata)))
def sync_with_hypervisor(vmachineguid, storagedriver_id=None): """ Updates a given vmachine with data retreived from a given pmachine """ try: vmachine = VMachine(vmachineguid) if storagedriver_id is None and vmachine.hypervisor_id is not None and vmachine.pmachine is not None: # Only the vmachine was received, so base the sync on hypervisorid and pmachine hypervisor = Factory.get(vmachine.pmachine) logger.info('Syncing vMachine (name {})'.format(vmachine.name)) vm_object = hypervisor.get_vm_agnostic_object( vmid=vmachine.hypervisor_id) elif storagedriver_id is not None and vmachine.devicename is not None: # Storage Driver id was given, using the devicename instead (to allow hypervisorid updates # which can be caused by re-adding a vm to the inventory) pmachine = PMachineList.get_by_storagedriver_id( storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) hypervisor = Factory.get(pmachine) if not hypervisor.file_exists( vmachine.vpool, hypervisor.clean_vmachine_filename( vmachine.devicename)): return vmachine.pmachine = pmachine vmachine.save() logger.info( 'Syncing vMachine (device {}, ip {}, mtpt {})'.format( vmachine.devicename, storagedriver.storage_ip, storagedriver.mountpoint)) vm_object = hypervisor.get_vm_object_by_devicename( devicename=vmachine.devicename, ip=storagedriver.storage_ip, mountpoint=storagedriver.mountpoint) else: message = 'Not enough information to sync vmachine' logger.info('Error: {0}'.format(message)) raise RuntimeError(message) except Exception as ex: logger.info('Error while fetching vMachine info: {0}'.format( str(ex))) raise if vm_object is None: message = 'Could not retreive hypervisor vmachine object' logger.info('Error: {0}'.format(message)) raise RuntimeError(message) else: VMachineController.update_vmachine_config(vmachine, vm_object)
def get_by_storagedriver_id(storagedriver_id): """ Get pMachine that hosts a given storagedriver_id """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('StorageDriver {0} could not be found'.format(storagedriver_id)) storagerouter = storagedriver.storagerouter if storagerouter is None: raise RuntimeError('StorageDriver {0} not linked to a StorageRouter'.format(storagedriver.name)) pmachine = storagerouter.pmachine if pmachine is None: raise RuntimeError('StorageRouter {0} not linked to a pMachine'.format(storagerouter.name)) return pmachine
def mountpoint_available_from_voldrv(mountpoint, storagedriver_id): """ Hook for (re)exporting the NFS mountpoint """ storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) if storagedriver is None: raise RuntimeError( 'A Storage Driver with id {0} could not be found.'.format( storagedriver_id)) if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def up_and_running(mountpoint, storagedriver_id): """ Volumedriver informs us that the service is completely started. Post-start events can be executed """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) storagedriver.startup_counter += 1 storagedriver.save() if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': client = SSHClient(storagedriver.storagerouter) if client.config_read('ovs.storagedriver.vmware_mode') == 'classic': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def create_volume(self, target_path, metadata_backend_config, volume_size, node_id): """ Create a mocked volume """ from ovs.dal.lists.storagedriverlist import StorageDriverList volume_id = str(uuid.uuid4()) storagedriver = StorageDriverList.get_by_storagedriver_id(node_id) if storagedriver is None: raise ValueError('Failed to retrieve storagedriver with ID {0}'.format(node_id)) StorageRouterClient.vrouter_id[self.vpool_guid][volume_id] = node_id StorageRouterClient._metadata_backend_config[self.vpool_guid][volume_id] = metadata_backend_config StorageRouterClient.volumes[self.vpool_guid][volume_id] = {'volume_id': volume_id, 'volume_size': volume_size, 'target_path': target_path} return volume_id
def rename_from_voldrv(old_name, new_name, storagedriver_id): """ This machine will handle the rename of a vmx file :param old_name: Old name of vmx :param new_name: New name for the vmx :param storagedriver_id: Storage Driver hosting the vmachine """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None old_name = hypervisor.clean_vmachine_filename(old_name) new_name = hypervisor.clean_vmachine_filename(new_name) scenario = hypervisor.get_rename_scenario(old_name, new_name) if scenario == 'RENAME': # Most likely a change from path. Updating path vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool) if vm is not None: vm.devicename = new_name vm.save() elif scenario == 'UPDATE': vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: # The vMachine doesn't seem to exist, so it's likely the create didn't came trough # Let's create it anyway VMachineController.update_from_voldrv( new_name, storagedriver_id=storagedriver_id) vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: raise RuntimeError( 'Could not create vMachine on rename. Aborting.') try: VMachineController.sync_with_hypervisor( vm.guid, storagedriver_id=storagedriver_id) vm.status = 'SYNC' except: vm.status = 'SYNC_NOK' vm.save()
def dtl_state_transition(volume_name, old_state, new_state, storagedriver_id): """ Triggered by volumedriver when DTL state changes :param volume_name: ID of the volume :param old_state: Previous DTL status :param new_state: New DTL status :param storagedriver_id: ID of the storagedriver hosting the volume :return: None """ if new_state == VolumeDriverEvents_pb2.Degraded and old_state != VolumeDriverEvents_pb2.Standalone: vdisk = VDiskList.get_vdisk_by_volume_id(volume_name) if vdisk: logger.info('Degraded DTL detected for volume {0} with guid {1}'.format(vdisk.name, vdisk.guid)) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) VDiskController.dtl_checkup(vdisk_guid=vdisk.guid, storagerouters_to_exclude=[storagedriver.storagerouter.guid], chain_timeout=600)
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) else: # No management Center, cannot update status via api logger.info( 'Updating status of pmachine {0} using SSHClient'.format( pmachine.name)) host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') configuration_dir = EtcdConfiguration.get( '/ovs/framework/paths|cfgdir') logger.info( 'SSHClient connected successfully to {0} at {1}'.format( pmachine.name, client.ip)) with Remote(client.ip, [LocalStorageRouterClient]) as remote: lsrc = remote.LocalStorageRouterClient( '{0}/storagedriver/storagedriver/{1}.json'.format( configuration_dir, storagedriver.vpool.name)) lsrc.server_revision() logger.info( 'LocalStorageRouterClient connected successfully to {0} at {1}' .format(pmachine.name, client.ip)) except Exception as ex: logger.error( 'Connectivity check failed, assuming host {0} is halted. {1}' .format(pmachine.name, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load( storagedriver.vpool) storagedriver_client.mark_node_offline( str(storagedriver.storagedriver_id))
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = "{} {}".format(machinename, devicename) properties_to_clone = ["description", "size", "type", "retentionpolicyguid", "snapshotpolicyguid", "autobackup"] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = "{0}-clone".format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError("Could not find StorageDriver with id {0}".format(vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError("Could not find a MDS service") logger.info("Clone snapshot {} of disk {} to location {}".format(snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig( [MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])] ), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id), ) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) return {"diskguid": new_vdisk.guid, "name": new_vdisk.name, "backingdevice": location}
def up_and_running(mountpoint, storagedriver_id): """ Volumedriver informs us that the service is completely started. Post-start events can be executed :param mountpoint: Mountpoint to check :param storagedriver_id: ID of the storagedriver """ storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('A Storage Driver with id {0} could not be found.'.format(storagedriver_id)) storagedriver.startup_counter += 1 storagedriver.save() if storagedriver.storagerouter.pmachine.hvtype == 'VMWARE': client = SSHClient(storagedriver.storagerouter) machine_id = System.get_my_machine_id(client) if EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(machine_id)) == 'classic': nfs = Nfsexports() nfs.unexport(mountpoint) nfs.export(mountpoint) nfs.trigger_rpc_mountd()
def create_volume(self, target_path, metadata_backend_config, volume_size, node_id, req_timeout_secs=None): """ Create a mocked volume """ _ = req_timeout_secs from ovs.dal.lists.storagedriverlist import StorageDriverList volume_id = str(uuid.uuid4()) storagedriver = StorageDriverList.get_by_storagedriver_id(node_id) if storagedriver is None: raise ValueError("Failed to retrieve storagedriver with ID {0}".format(node_id)) StorageRouterClient.vrouter_id[self.vpool_guid][volume_id] = node_id StorageRouterClient._metadata_backend_config[self.vpool_guid][volume_id] = metadata_backend_config StorageRouterClient.volumes[self.vpool_guid][volume_id] = { "volume_id": volume_id, "volume_size": volume_size, "target_path": target_path, } return volume_id
def delete_from_voldrv(volumename, storagedriver_id): """ Delete a disk Triggered by volumedriver messages on the queue @param volumename: volume id of the disk """ _ = storagedriver_id # For logging purposes disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: mutex = VolatileMutex('{}_{}'.format(volumename, disk.devicename)) try: mutex.acquire(wait=20) pmachine = None try: pmachine = PMachineList.get_by_storagedriver_id( disk.storagedriver_id) except RuntimeError as ex: if 'could not be found' not in str(ex): raise # else: pmachine can't be loaded, because the volumedriver doesn't know about it anymore if pmachine is not None: limit = 5 storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) hypervisor = Factory.get(pmachine) exists = hypervisor.file_exists(storagedriver, disk.devicename) while limit > 0 and exists is True: time.sleep(1) exists = hypervisor.file_exists( storagedriver, disk.devicename) limit -= 1 if exists is True: logger.info( 'Disk {0} still exists, ignoring delete'.format( disk.devicename)) return logger.info('Delete disk {}'.format(disk.name)) for mds_service in disk.mds_services: mds_service.delete() disk.delete() finally: mutex.release()
def sync_with_hypervisor(vmachineguid, storagedriver_id=None): """ Updates a given vmachine with data retreived from a given pmachine """ try: vmachine = VMachine(vmachineguid) if storagedriver_id is None and vmachine.hypervisor_id is not None and vmachine.pmachine is not None: # Only the vmachine was received, so base the sync on hypervisorid and pmachine hypervisor = Factory.get(vmachine.pmachine) logger.info('Syncing vMachine (name {})'.format(vmachine.name)) vm_object = hypervisor.get_vm_agnostic_object(vmid=vmachine.hypervisor_id) elif storagedriver_id is not None and vmachine.devicename is not None: # Storage Driver id was given, using the devicename instead (to allow hypervisorid updates # which can be caused by re-adding a vm to the inventory) pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) hypervisor = Factory.get(pmachine) if not hypervisor.file_exists(vmachine.vpool, hypervisor.clean_vmachine_filename(vmachine.devicename)): return vmachine.pmachine = pmachine vmachine.save() logger.info('Syncing vMachine (device {}, ip {}, mtpt {})'.format(vmachine.devicename, storagedriver.storage_ip, storagedriver.mountpoint)) vm_object = hypervisor.get_vm_object_by_devicename(devicename=vmachine.devicename, ip=storagedriver.storage_ip, mountpoint=storagedriver.mountpoint) else: message = 'Not enough information to sync vmachine' logger.info('Error: {0}'.format(message)) raise RuntimeError(message) except Exception as ex: logger.info('Error while fetching vMachine info: {0}'.format(str(ex))) raise if vm_object is None: message = 'Could not retreive hypervisor vmachine object' logger.info('Error: {0}'.format(message)) raise RuntimeError(message) else: VMachineController.update_vmachine_config(vmachine, vm_object)
def rename_from_voldrv(old_name, new_name, storagedriver_id): """ This machine will handle the rename of a vmx file :param old_name: Old name of vmx :param new_name: New name for the vmx :param storagedriver_id: Storage Driver hosting the vmachine """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool else: vpool = None old_name = hypervisor.clean_vmachine_filename(old_name) new_name = hypervisor.clean_vmachine_filename(new_name) scenario = hypervisor.get_rename_scenario(old_name, new_name) if scenario == 'RENAME': # Most likely a change from path. Updating path vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool) if vm is not None: vm.devicename = new_name vm.save() elif scenario == 'UPDATE': vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: # The vMachine doesn't seem to exist, so it's likely the create didn't came trough # Let's create it anyway VMachineController.update_from_voldrv(new_name, storagedriver_id=storagedriver_id) vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: raise RuntimeError('Could not create vMachine on rename. Aborting.') try: VMachineController.sync_with_hypervisor(vm.guid, storagedriver_id=storagedriver_id) vm.status = 'SYNC' except: vm.status = 'SYNC_NOK' vm.save()
def new_function(*args, **kwargs): """ Wrapped function """ # Log the call if event_type == 'VOLUMEDRIVER_TASK': metadata = {'storagedriver': StorageDriverList.get_by_storagedriver_id(kwargs['storagedriver_id']).guid} else: metadata = {} _logger = LogHandler.get('log', name=event_type.lower()) _logger.info('[{0}.{1}] - {2} - {3} - {4}'.format( f.__module__, f.__name__, json.dumps(list(args)), json.dumps(kwargs), json.dumps(metadata) )) # Call the function return f(*args, **kwargs)
def delete_from_voldrv(name, storagedriver_id): """ This method will delete a vmachine based on the name of the vmx given """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool else: vpool = None vm = VMachineList.get_by_devicename_and_vpool(name, vpool) if vm is not None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_deleted', 'metadata': {'name': vm.name}}) vm.delete(abandon=['vdisks'])
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) host_status = pmachine.host_status if host_status != 'RUNNING': # Host is stopped storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) storagedriver_client = StorageDriverClient().load(storagedriver.vpool) storagedriver_client.mark_node_offline(str(storagedriver.storagedriver_id)) else: # No management Center, cannot update status via api #TODO: should we try manually (ping, ssh)? pass
def new_function(*args, **kwargs): """ Wrapped function """ # Log the call if event_type == 'VOLUMEDRIVER_TASK': metadata = { 'storagedriver': StorageDriverList.get_by_storagedriver_id( kwargs['storagedriver_id']).guid } else: metadata = {} _logger = LogHandler.get('log', name=event_type.lower()) _logger.info('[{0}.{1}] - {2} - {3} - {4}'.format( f.__module__, f.__name__, json.dumps(list(args)), json.dumps(kwargs), json.dumps(metadata))) # Call the function return f(*args, **kwargs)
def migrate_from_voldrv(volume_id, new_owner_id): """ Triggered when volume has changed owner (Clean migration or stolen due to other reason) Triggered by volumedriver messages :param volume_id: Volume ID of the disk :type volume_id: unicode :param new_owner_id: ID of the storage driver the volume migrated to :type new_owner_id: unicode :returns: None """ sd = StorageDriverList.get_by_storagedriver_id(storagedriver_id=new_owner_id) vdisk = VDiskList.get_vdisk_by_volume_id(volume_id=volume_id) if vdisk is not None: logger.info('Migration - Guid {0} - ID {1} - Detected migration for virtual disk {2}'.format(vdisk.guid, vdisk.volume_id, vdisk.name)) if sd is not None: logger.info('Migration - Guid {0} - ID {1} - Storage Router {2} is the new owner of virtual disk {3}'.format(vdisk.guid, vdisk.volume_id, sd.storagerouter.name, vdisk.name)) MDSServiceController.mds_checkup() VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)
def clean_bad_disk(vdiskguid): """ Cleanup bad vdisk: - in case create_from_template failed - remove mds_services so the vdisk can be properly cleaned up :param vdiskguid: guid of vdisk :return: None """ vdisk = VDisk(vdiskguid) logger.info('Cleanup vdisk {0}'.format(vdisk.name)) for mdss in vdisk.mds_services: mdss.delete() storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is not None and vdisk.devicename is not None: logger.debug('Removing volume from filesystem') volumepath = vdisk.devicename mountpoint = storagedriver.mountpoint devicepath = '{0}/{1}'.format(mountpoint, volumepath) VDiskController.delete_volume(devicepath) logger.debug('Deleting vdisk {0} from model'.format(vdisk.name)) vdisk.delete()
def new_function(*args, **kwargs): """ Wrapped function :param args: Arguments without default values :param kwargs: Arguments with default values """ # Log the call if event_type == 'VOLUMEDRIVER_TASK' and 'storagedriver_id' in kwargs: metadata = { 'storagedriver': StorageDriverList.get_by_storagedriver_id( kwargs['storagedriver_id']).guid } else: metadata = {} _logger = Logger(event_type.lower()) _logger.info('[{0}.{1}] - {2} - {3} - {4}'.format( f.__module__, f.__name__, json.dumps(list(args)), json.dumps(kwargs), json.dumps(metadata))) # Call the function return f(*args, **kwargs)
def new_function(*args, **kwargs): """ Wrapped function """ # Log the call log_entry = Log() log_entry.source = event_type log_entry.module = f.__module__ log_entry.method = f.__name__ log_entry.method_args = list(args) log_entry.method_kwargs = kwargs log_entry.time = time.time() if event_type == 'VOLUMEDRIVER_TASK': try: log_entry.storagedriver = StorageDriverList.get_by_storagedriver_id(kwargs['storagedriver_id']) log_entry.save() except ObjectNotFoundException: pass else: log_entry.save() # Call the function return f(*args, **kwargs)
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) host_status = pmachine.host_status if host_status != 'RUNNING': # Host is stopped storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) storagedriver_client = StorageDriverClient.load( storagedriver.vpool) storagedriver_client.mark_node_offline( str(storagedriver.storagedriver_id)) else: # No management Center, cannot update status via api # @TODO: should we try manually (ping, ssh)? pass
def delete_from_voldrv(volumename, storagedriver_id): """ Delete a disk Triggered by volumedriver messages on the queue @param volumename: volume id of the disk """ _ = storagedriver_id # For logging purposes disk = VDiskList.get_vdisk_by_volume_id(volumename) if disk is not None: mutex = VolatileMutex('{}_{}'.format(volumename, disk.devicename)) try: mutex.acquire(wait=20) pmachine = None try: pmachine = PMachineList.get_by_storagedriver_id(disk.storagedriver_id) except RuntimeError as ex: if 'could not be found' not in str(ex): raise # else: pmachine can't be loaded, because the volumedriver doesn't know about it anymore if pmachine is not None: limit = 5 storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) hypervisor = Factory.get(pmachine) exists = hypervisor.file_exists(storagedriver, disk.devicename) while limit > 0 and exists is True: time.sleep(1) exists = hypervisor.file_exists(storagedriver, disk.devicename) limit -= 1 if exists is True: logger.info('Disk {0} still exists, ignoring delete'.format(disk.devicename)) return logger.info('Delete disk {}'.format(disk.name)) for mds_service in disk.mds_services: mds_service.delete() disk.delete() finally: mutex.release()
def _execute_scrub(queue, vpool, scrub_info, scrub_dir, error_messages): def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs storagerouter = scrub_info['storage_router'] partition_guid = scrub_info['partition_guid'] volatile_client = VolatileFactory.get_client() backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, partition_guid) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get( False ) # Raises Empty Exception when queue is empty, so breaking the while True loop volatile_key = 'ovs_scrubbing_vdisk_{0}'.format(vdisk_guid) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_dir)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: GenericController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Check if vDisk is already being scrubbed if volatile_client.add(key=volatile_key, value=volatile_key, time=24 * 60 * 60) is False: GenericController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because vDisk is already being scrubbed' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_dir, log_sinks=[ LogHandler.get_sink_path( 'scrubber_{0}'.format(vpool.name), allow_override=True, forced_target_type='file') ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) GenericController._logger.exception(message) finally: # Remove vDisk from volatile memory volatile_client.delete(volatile_key) except Empty: # Raised when all items have been fetched from the queue GenericController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) GenericController._logger.exception(message)
def create_from_template(diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None): """ Create a disk from a template @param devicename: device file name for the disk (eg: mydisk-flat.vmdk) @param machineguid: guid of the machine to assign disk to @return diskguid: guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) disk_path = hypervisor.get_disk_path(machinename, devicename) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyid', 'snapshotpolicyid', 'vmachine', 'vpool'] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError('The given vdisk does not belong to a template') if storagedriver_guid is not None: storagedriver_id = StorageDriver(storagedriver_guid).storagedriver_id else: storagedriver_id = vdisk.storagedriver_id storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with id {0}'.format(storagedriver_id)) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Create disk from template {} to new disk {} to location {}'.format( vdisk.name, new_vdisk.name, disk_path )) try: volume_id = vdisk.storagedriver_client.create_clone_from_template( target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver_id) ) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Clone disk on volumedriver level failed with exception: {0}'.format(str(ex))) new_vdisk.delete() raise return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': disk_path}
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None): """ Clone a disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup' ] vdisk = VDisk(diskguid) location = hypervisor.get_backing_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{0}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid new_vdisk.vmachine = VMachine( machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() try: storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError( 'Could not find StorageDriver with id {0}'.format( vdisk.storagedriver_id)) mds_service = MDSServiceController.get_preferred_mds( storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {} of disk {} to location {}'.format( snapshotid, vdisk.name, location)) volume_id = vdisk.storagedriver_client.create_clone( target_path=location, metadata_backend_config=MDSMetaDataBackendConfig([ MDSNodeConfig(address=str( mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]) ]), parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id)) except Exception as ex: logger.error( 'Caught exception during clone, trying to delete the volume. {0}' .format(ex)) new_vdisk.delete() VDiskController.delete_volume(location) raise new_vdisk.volume_id = volume_id new_vdisk.save() try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error( 'Caught exception during "ensure_safety" {0}'.format(ex)) return { 'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location }
def _execute_scrub_work(scrub_location, vdisk_guids): def verify_mds_config(current_vdisk): """ Retrieve the metadata backend configuration for vDisk :param current_vdisk: vDisk to retrieve configuration for :type current_vdisk: vDisk :return: MDS configuration for vDisk """ current_vdisk.invalidate_dynamics(['info']) vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs logger.info('Execute Scrub - Started') logger.info( 'Execute Scrub - Scrub location - {0}'.format(scrub_location)) total = len(vdisk_guids) skipped = 0 storagedrivers = {} failures = [] for vdisk_guid in vdisk_guids: vdisk = VDisk(vdisk_guid) try: # Load the vDisk's StorageDriver logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Started'.format( vdisk.guid, vdisk.name)) vdisk.invalidate_dynamics(['storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[ vdisk. storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration configs = verify_mds_config(current_vdisk=vdisk) # Check MDS master is local. Trigger MDS handover if necessary if configs[0].get('ip') != storagedriver.storagerouter.ip: logger.debug( 'Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover' .format(vdisk.guid, vdisk.name)) MDSServiceController.ensure_safety(vdisk) configs = verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: skipped += 1 logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local' .format(vdisk.guid, vdisk.name)) continue with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work' .format(vdisk.guid, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: scrubbing_result = locked_client.scrub( work_unit, scrub_location) locked_client.apply_scrubbing_result(scrubbing_result) if work_units: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied' .format(vdisk.guid, vdisk.name)) else: logger.info( 'Execute Scrub - Virtual disk {0} - {1} - No scrubbing required' .format(vdisk.guid, vdisk.name)) except Exception as ex: failures.append( 'Failed scrubbing work unit for volume {0} with guid {1}: {2}' .format(vdisk.name, vdisk.guid, ex)) failed = len(failures) logger.info( 'Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}' .format((total - failed - skipped), failed, skipped)) if failed > 0: raise Exception('\n - '.join(failures)) return vdisk_guids
def clone(machineguid, timestamp, name): """ Clone a vmachine using the disk snapshot based on a snapshot timestamp @param machineguid: guid of the machine to clone @param timestamp: timestamp of the disk snapshots to use for the clone @param name: name for the new machine """ machine = VMachine(machineguid) timestamp = str(timestamp) if timestamp not in (snap['timestamp'] for snap in machine.snapshots): raise RuntimeError('Invalid timestamp provided, not a valid snapshot of this vmachine.') vpool = None storagerouter = None if machine.pmachine is not None and machine.pmachine.hvtype == 'VMWARE': for vdisk in machine.vdisks: if vdisk.vpool is not None: vpool = vdisk.vpool break for vdisk in machine.vdisks: if vdisk.storagerouter_guid: storagerouter = StorageRouter(vdisk.storagerouter_guid) break hv = Factory.get(machine.pmachine) vm_path = hv.get_vmachine_path(name, storagerouter.machine_id if storagerouter is not None else '') # mutex in sync_with_hypervisor uses "None" for KVM hvtype mutex = volatile_mutex('{0}_{1}'.format(hv.clean_vmachine_filename(vm_path), vpool.guid if vpool is not None else 'none')) disks = {} for snapshot in machine.snapshots: if snapshot['timestamp'] == timestamp: for diskguid, snapshotguid in snapshot['snapshots'].iteritems(): disks[diskguid] = snapshotguid try: mutex.acquire(wait=120) new_machine = VMachine() new_machine.copy(machine) new_machine.name = name new_machine.devicename = hv.clean_vmachine_filename(vm_path) new_machine.pmachine = machine.pmachine new_machine.save() finally: mutex.release() new_disk_guids = [] vm_disks = [] mountpoint = None disks_by_order = sorted(machine.vdisks, key=lambda x: x.order) try: for currentDisk in disks_by_order: if machine.is_vtemplate and currentDisk.templatesnapshot: snapshotid = currentDisk.templatesnapshot else: snapshotid = disks[currentDisk.guid] prefix = '%s-clone' % currentDisk.name result = VDiskController.clone(diskguid=currentDisk.guid, snapshotid=snapshotid, devicename=prefix, pmachineguid=new_machine.pmachine_guid, machinename=new_machine.name, machineguid=new_machine.guid) new_disk_guids.append(result['diskguid']) mountpoint = StorageDriverList.get_by_storagedriver_id(currentDisk.storagedriver_id).mountpoint vm_disks.append(result) except Exception as ex: VMachineController._logger.error('Failed to clone disks. {0}'.format(ex)) VMachineController.delete(machineguid=new_machine.guid) raise try: result = hv.clone_vm(machine.hypervisor_id, name, vm_disks, mountpoint) except Exception as ex: VMachineController._logger.error('Failed to clone vm. {0}'.format(ex)) VMachineController.delete(machineguid=new_machine.guid) raise try: mutex.acquire(wait=120) new_machine.hypervisor_id = result new_machine.save() finally: mutex.release() return new_machine.guid
def process(queue, body, mapping): """ Processes the actual received body """ if queue == Configuration.get('ovs.core.broker.queues.storagedriver'): cache = VolatileFactory.get_client() all_extensions = None message = FileSystemEvents.EventMessage() message.ParseFromString(body) # Possible special tags used as `arguments` key: # - [NODE_ID]: Replaced by the storagedriver_id as reported by the event # - [CLUSTER_ID]: Replaced by the clusterid as reported by the event # Possible deduping key tags: # - [EVENT_NAME]: The name of the eventmessage type # - [TASK_NAME]: Task method name # - [<argument value>]: Any value of the `arguments` dictionary. logger.info('Got event, processing...') event = None for extension in mapping.keys(): if not message.event.HasExtension(extension): continue event = message.event.Extensions[extension] node_id = message.node_id cluster_id = message.cluster_id for current_map in mapping[extension]: task = current_map['task'] kwargs = {} delay = 0 routing_key = 'generic' for field, target in current_map['arguments'].iteritems(): if field == '[NODE_ID]': kwargs[target] = node_id elif field == '[CLUSTER_ID]': kwargs[target] = cluster_id else: kwargs[target] = getattr(event, field) if 'options' in current_map: options = current_map['options'] if options.get('execonstoragerouter', False): storagedriver = StorageDriverList.get_by_storagedriver_id( node_id) if storagedriver is not None: routing_key = 'sr.{0}'.format( storagedriver.storagerouter.machine_id) delay = options.get('delay', 0) dedupe = options.get('dedupe', False) dedupe_key = options.get('dedupe_key', None) if dedupe is True and dedupe_key is not None: # We can't dedupe without a key key = dedupe_key key = key.replace('[EVENT_NAME]', extension.full_name) key = key.replace('[TASK_NAME]', task.__class__.__name__) for kwarg_key in kwargs: key = key.replace('[{0}]'.format(kwarg_key), kwargs[kwarg_key]) key = key.replace(' ', '_') task_id = cache.get(key) if task_id: # Key exists, task was already scheduled # If task is already running, the revoke message will # be ignored revoke(task_id) _log(task, kwargs, node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) cache.set(key, async_result.id, 600) # Store the task id new_task_id = async_result.id else: _log(task, kwargs, node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) new_task_id = async_result.id else: async_result = task.delay(**kwargs) new_task_id = async_result.id logger.info( '[{0}] {1}({2}) started on {3} with taskid {4}. Delay: {5}s' .format(queue, task.__name__, json.dumps(kwargs), routing_key, new_task_id, delay)) if event is None: message_type = 'unknown' if all_extensions is None: all_extensions = _load_extensions() for extension in all_extensions: if message.event.HasExtension(extension): message_type = extension.full_name logger.info( 'A message with type {0} was received. Skipped.'.format( message_type)) elif queue == 'notifications.info': logger.info('Received notification from openstack...') try: body = json.loads(body) print(body) event_type = body['event_type'] logger.info( 'Processing notification for event {0}'.format(event_type)) if event_type == 'compute.instance.update': old_display_name = body['payload'].get('old_display_name') instance_id = body['payload']['instance_id'] display_name = body['payload'].get('display_name') if old_display_name and old_display_name != display_name: logger.info('Caught instance rename event') VMachineController.update_vmachine_name.apply_async( kwargs={ 'old_name': old_display_name, 'new_name': display_name, 'instance_id': instance_id }) elif event_type == 'volume.update.start': volume_id = body['payload']['volume_id'] display_name = body['payload']['display_name'] CINDER_VOLUME_UPDATE_CACHE[volume_id] = display_name elif event_type == 'volume.update.end': volume_id = body['payload']['volume_id'] display_name = body['payload']['display_name'] old_display_name = CINDER_VOLUME_UPDATE_CACHE.get(volume_id) if old_display_name and old_display_name != display_name: logger.info('Caught volume rename event') VDiskController.update_vdisk_name.apply_async( kwargs={ 'volume_id': volume_id, 'old_name': old_display_name, 'new_name': display_name }) del CINDER_VOLUME_UPDATE_CACHE[volume_id] except Exception as ex: logger.error('Processing notification failed {0}'.format(ex)) logger.info('Processed notification from openstack.') else: raise NotImplementedError( 'Queue {} is not yet implemented'.format(queue))
def create_from_template(diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None): """ Create a disk from a template @param devicename: device file name for the disk (eg: mydisk-flat.vmdk) @param machineguid: guid of the machine to assign disk to @return diskguid: guid of new disk """ pmachine = PMachine(pmachineguid) hypervisor = Factory.get(pmachine) disk_path = hypervisor.get_disk_path(machinename, devicename) description = '{} {}'.format(machinename, devicename) properties_to_clone = [ 'description', 'size', 'type', 'retentionpolicyid', 'snapshotpolicyid', 'vmachine', 'vpool' ] vdisk = VDisk(diskguid) if vdisk.vmachine and not vdisk.vmachine.is_vtemplate: # Disk might not be attached to a vmachine, but still be a template raise RuntimeError('The given vdisk does not belong to a template') if storagedriver_guid is not None: storagedriver_id = StorageDriver( storagedriver_guid).storagedriver_id else: storagedriver_id = vdisk.storagedriver_id storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) if storagedriver is None: raise RuntimeError( 'Could not find StorageDriver with id {0}'.format( storagedriver_id)) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=properties_to_clone) new_vdisk.vpool = vdisk.vpool new_vdisk.devicename = hypervisor.clean_backing_disk_filename( disk_path) new_vdisk.parent_vdisk = vdisk new_vdisk.name = '{}-clone'.format(vdisk.name) new_vdisk.description = description new_vdisk.vmachine = VMachine( machineguid) if machineguid else vdisk.vmachine new_vdisk.save() mds_service = MDSServiceController.get_preferred_mds( storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info( 'Create disk from template {} to new disk {} to location {}'. format(vdisk.name, new_vdisk.name, disk_path)) try: volume_id = vdisk.storagedriver_client.create_clone_from_template( target_path=disk_path, metadata_backend_config=MDSMetaDataBackendConfig([ MDSNodeConfig(address=str( mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]) ]), parent_volume_id=str(vdisk.volume_id), node_id=str(storagedriver_id)) new_vdisk.volume_id = volume_id new_vdisk.save() MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error( 'Clone disk on volumedriver level failed with exception: {0}'. format(str(ex))) new_vdisk.delete() raise return { 'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': disk_path }
def clone(diskguid, snapshotid, devicename, pmachineguid, machinename=None, machineguid=None, detached=False): """ Clone a disk :param diskguid: Guid of the disk to clone :param snapshotid: ID of the snapshot to clone from :param devicename: Name of the device to use in clone's description :param pmachineguid: Guid of the physical machine :param machinename: Name of the machine the disk is attached to :param machineguid: Guid of the machine :param detached: Boolean indicating the disk is attached to a machine or not """ # 1. Validations name_regex = "^[0-9a-zA-Z][-_a-zA-Z0-9]{1,48}[a-zA-Z0-9]$" if not re.match(name_regex, devicename): raise RuntimeError("Invalid name for virtual disk clone") if VDiskList.get_vdisk_by_name(vdiskname=devicename) is not None: raise RuntimeError("A virtual disk with this name already exists") vdisk = VDisk(diskguid) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if storagedriver is None: raise RuntimeError('Could not find StorageDriver with ID {0}'.format(vdisk.storagedriver_id)) if machineguid is not None and detached is True: raise ValueError('A vMachine GUID was specified while detached is True') # 2. Create new snapshot if required if snapshotid is None: timestamp = str(int(time.time())) metadata = {'label': '', 'is_consistent': False, 'timestamp': timestamp, 'machineguid': machineguid, 'is_automatic': True} sd_snapshot_id = VDiskController.create_snapshot(diskguid, metadata) tries = 25 # 5 minutes while snapshotid is None and tries > 0: time.sleep(25 - tries) tries -= 1 vdisk.invalidate_dynamics(['snapshots']) for snapshot in vdisk.snapshots: if snapshot['guid'] != sd_snapshot_id: continue if snapshot['in_backend'] is True: snapshotid = snapshot['guid'] if snapshotid is None: try: VDiskController.delete_snapshot(diskguid=diskguid, snapshotid=sd_snapshot_id) except: pass raise RuntimeError('Could not find created snapshot in time') # 3. Model new cloned virtual disk hypervisor = Factory.get(PMachine(pmachineguid)) location = hypervisor.get_disk_path(machinename, devicename) new_vdisk = VDisk() new_vdisk.copy(vdisk, include=['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup']) new_vdisk.parent_vdisk = vdisk new_vdisk.name = devicename new_vdisk.description = devicename if machinename is None else '{0} {1}'.format(machinename, devicename) new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location) new_vdisk.parentsnapshot = snapshotid if detached is False: new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine new_vdisk.vpool = vdisk.vpool new_vdisk.save() # 4. Configure Storage Driver try: mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool) if mds_service is None: raise RuntimeError('Could not find a MDS service') logger.info('Clone snapshot {0} of disk {1} to location {2}'.format(snapshotid, vdisk.name, location)) backend_config = MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]) volume_id = vdisk.storagedriver_client.create_clone(target_path=location, metadata_backend_config=backend_config, parent_volume_id=str(vdisk.volume_id), parent_snapshot_id=str(snapshotid), node_id=str(vdisk.storagedriver_id)) except Exception as ex: logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex)) try: VDiskController.clean_bad_disk(new_vdisk.guid) except Exception as ex2: logger.exception('Exception during exception handling of "create_clone_from_template" : {0}'.format(str(ex2))) raise new_vdisk.volume_id = volume_id new_vdisk.save() # 5. Check MDS & DTL for new clone try: MDSServiceController.ensure_safety(new_vdisk) except Exception as ex: logger.error('Caught exception during "ensure_safety" {0}'.format(ex)) VDiskController.dtl_checkup.delay(vdisk_guid=new_vdisk.guid) return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name, 'backingdevice': location}
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int( mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({ 'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': [] }) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({ 'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': [] }) buckets.append({ 'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': [] }) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot[ 'snapshots'].iteritems(): bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot[ 'is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp'] ] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp'] ] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot( diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) total = 0 failed = 0 skipped = 0 storagedrivers = {} for vdisk in vdisks: try: total += 1 # Load the vDisk's StorageDriver vdisk.invalidate_dynamics(['info', 'storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[ vdisk. storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: # The MDS master is not local. Trigger an MDS handover and try again logger.debug( 'MDS for volume {0} is not local. Trigger handover'. format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: skipped += 1 logger.info( 'Skipping scrubbing work unit for volume {0}: MDS master is not local' .format(vdisk.volume_id)) continue work_units = vdisk.storagedriver_client.get_scrubbing_workunits( str(vdisk.volume_id)) for work_unit in work_units: scrubbing_result = _storagedriver_scrubber.scrub( work_unit, str(storagedriver.mountpoint_temp)) vdisk.storagedriver_client.apply_scrubbing_result( scrubbing_result) except Exception, ex: failed += 1 logger.info( 'Failed scrubbing work unit for volume {0}: {1}'.format( vdisk.volume_id, ex))
def process(queue, body, mapping): """ Processes the actual received body """ if queue == Configuration.get('ovs.core.broker.volumerouter.queue'): import json import volumedriver.storagerouter.EventMessages_pb2 as EventMessages cache = VolatileFactory.get_client() data = EventMessages.EventMessage().FromString(body) # Possible special tags used as `arguments` key: # - [NODE_ID]: Replaced by the storagedriver_id as reported by the event # - [CLUSTER_ID]: Replaced by the clusterid as reported by the event # Possible deduping key tags: # - [EVENT_NAME]: The name of the eventmessage type # - [TASK_NAME]: Task method name # - [<argument value>]: Any value of the `arguments` dictionary. if data.type in mapping: for current_map in mapping[data.type]: task = current_map['task'] data_container = getattr(data, current_map['property']) kwargs = {} delay = 0 routing_key = 'generic' for field, target in current_map['arguments'].iteritems(): if field == '[NODE_ID]': kwargs[target] = data.node_id elif field == '[CLUSTER_ID]': kwargs[target] = data.cluster_id else: kwargs[target] = getattr(data_container, field) if 'options' in current_map: options = current_map['options'] if options.get('execonstoragerouter', False): storagedriver = StorageDriverList.get_by_storagedriver_id( data.node_id) if storagedriver is not None: routing_key = 'sr.{0}'.format( storagedriver.storagerouter.machine_id) delay = options.get('delay', 0) dedupe = options.get('dedupe', False) dedupe_key = options.get('dedupe_key', None) if dedupe is True and dedupe_key is not None: # We can't dedupe without a key key = dedupe_key key = key.replace('[EVENT_NAME]', data.type.__class__.__name__) key = key.replace('[TASK_NAME]', task.__class__.__name__) for kwarg_key in kwargs: key = key.replace('[{0}]'.format(kwarg_key), kwargs[kwarg_key]) key = key.replace(' ', '_') task_id = cache.get(key) if task_id: # Key exists, task was already scheduled # If task is already running, the revoke message will # be ignored revoke(task_id) _log(task, kwargs, data.node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) cache.set(key, async_result.id, 600) # Store the task id new_task_id = async_result.id else: _log(task, kwargs, data.node_id) async_result = task.s(**kwargs).apply_async( countdown=delay, routing_key=routing_key) new_task_id = async_result.id else: async_result = task.delay(**kwargs) new_task_id = async_result.id logger.info( '[{0}] {1}({2}) started on {3} with taskid {4}. Delay: {5}s' .format(queue, task.__name__, json.dumps(kwargs), routing_key, new_task_id, delay)) else: logger.info('Message type {0} was received. Skipped.'.format( str(data.type))) else: raise NotImplementedError( 'Queue {} is not yet implemented'.format(queue))
def update_from_voldrv(name, storagedriver_id): """ This method will update/create a vmachine based on a given vmx/xml file """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool machine_ids = [ storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers ] if hypervisor.should_process(name, machine_ids=machine_ids): if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) mutex = VolatileMutex('{}_{}'.format( name, vpool.guid if vpool is not None else 'none')) try: mutex.acquire(wait=120) limit = 5 exists = hypervisor.file_exists(storagedriver, name) while limit > 0 and exists is False: time.sleep(1) exists = hypervisor.file_exists(storagedriver, name) limit -= 1 if exists is False: logger.info( 'Could not locate vmachine with name {0} on vpool {1}'. format(name, vpool.name)) vmachine = VMachineList.get_by_devicename_and_vpool( name, vpool) if vmachine is not None: VMachineController.delete_from_voldrv( name, storagedriver_id=storagedriver_id) return finally: mutex.release() try: mutex.acquire(wait=5) vmachine = VMachineList.get_by_devicename_and_vpool( name, vpool) if not vmachine: vmachine = VMachine() vmachine.vpool = vpool vmachine.pmachine = pmachine vmachine.status = 'CREATED' vmachine.devicename = name vmachine.save() finally: mutex.release() if pmachine.hvtype == 'KVM': try: VMachineController.sync_with_hypervisor( vmachine.guid, storagedriver_id=storagedriver_id) vmachine.status = 'SYNC' except: vmachine.status = 'SYNC_NOK' vmachine.save() else: logger.info('Ignored invalid file {0}'.format(name))