def invalidate_vmachine_status(self, name): if not name.endswith('.xml'): return devicename = '{0}/{1}'.format(System.get_my_machine_id(), name) vm = VMachineList().get_by_devicename_and_vpool(devicename, None) if vm: vm.invalidate_dynamics() logger.debug('Hypervisor status invalidated for: {0}'.format(name))
def can_be_deleted(self, storagedriver): """ Checks whether a Storage Driver can be deleted """ result = True storagerouter = storagedriver.storagerouter storagedrivers_left = len([ sd for sd in storagerouter.storagedrivers if sd.guid != storagedriver.guid ]) pmachine = storagerouter.pmachine vmachines = VMachineList.get_customer_vmachines() vpools_guids = [ vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None ] pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines] vpool = storagedriver.vpool if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids: result = False if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id): result = False return result
def get_vmachine_by_name(name): """ Retrieve the DAL vMachine object based on its name :param name: Name of the virtual machine :return: vMachine DAL object """ return VMachineList.get_vmachine_by_name(vmname=name)
def delete_from_voldrv(name, storagedriver_id): """ This method will delete a vmachine based on the name of the vmx given """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None vm = VMachineList.get_by_devicename_and_vpool(name, vpool) if vm is not None: MessageController.fire(MessageController.Type.EVENT, { 'type': 'vmachine_deleted', 'metadata': { 'name': vm.name } }) vm.delete(abandon=['vdisks'])
def rename_from_voldrv(old_name, new_name, storagedriver_id): """ This machine will handle the rename of a vmx file :param old_name: Old name of vmx :param new_name: New name for the vmx :param storagedriver_id: Storage Driver hosting the vmachine """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None old_name = hypervisor.clean_vmachine_filename(old_name) new_name = hypervisor.clean_vmachine_filename(new_name) scenario = hypervisor.get_rename_scenario(old_name, new_name) if scenario == 'RENAME': # Most likely a change from path. Updating path vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool) if vm is not None: vm.devicename = new_name vm.save() elif scenario == 'UPDATE': vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: # The vMachine doesn't seem to exist, so it's likely the create didn't came trough # Let's create it anyway VMachineController.update_from_voldrv( new_name, storagedriver_id=storagedriver_id) vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: raise RuntimeError( 'Could not create vMachine on rename. Aborting.') try: VMachineController.sync_with_hypervisor( vm.guid, storagedriver_id=storagedriver_id) vm.status = 'SYNC' except: vm.status = 'SYNC_NOK' vm.save()
def update_vmachine_name(instance_id, old_name, new_name): """ Update a vMachine name: find vmachine by management center instance id, set new name :param instance_id: ID for the virtual machine known by management center :param old_name: Old name of the virtual machine :param new_name: New name for the virtual machine """ vmachine = None for mgmt_center in MgmtCenterList.get_mgmtcenters(): mgmt = Factory.get_mgmtcenter(mgmt_center=mgmt_center) try: machine_info = mgmt.get_vmachine_device_info(instance_id) file_name = machine_info['file_name'] host_name = machine_info['host_name'] vpool_name = machine_info['vpool_name'] storage_router = StorageRouterList.get_by_name(host_name) machine_id = storage_router.machine_id device_name = '{0}/{1}'.format(machine_id, file_name) vp = VPoolList.get_vpool_by_name(vpool_name) vmachine = VMachineList.get_by_devicename_and_vpool( device_name, vp) if vmachine: break vmachine = VMachineList.get_by_devicename_and_vpool( device_name, None) if vmachine: break except Exception as ex: logger.info( 'Trying to get mgmt center failed for vmachine {0}. {1}'. format(old_name, ex)) if not vmachine: logger.error('No vmachine found for name {0}'.format(old_name)) return vpool = vmachine.vpool mutex = VolatileMutex('{0}_{1}'.format( old_name, vpool.guid if vpool is not None else 'none')) try: mutex.acquire(wait=5) vmachine.name = new_name vmachine.save() finally: mutex.release()
def rename_from_voldrv(old_name, new_name, storagedriver_id): """ This machine will handle the rename of a vmx file :param old_name: Old name of vmx :param new_name: New name for the vmx :param storagedriver_id: Storage Driver hosting the vmachine """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool else: vpool = None old_name = hypervisor.clean_vmachine_filename(old_name) new_name = hypervisor.clean_vmachine_filename(new_name) scenario = hypervisor.get_rename_scenario(old_name, new_name) if scenario == 'RENAME': # Most likely a change from path. Updating path vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool) if vm is not None: vm.devicename = new_name vm.save() elif scenario == 'UPDATE': vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: # The vMachine doesn't seem to exist, so it's likely the create didn't came trough # Let's create it anyway VMachineController.update_from_voldrv(new_name, storagedriver_id=storagedriver_id) vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool) if vm is None: raise RuntimeError('Could not create vMachine on rename. Aborting.') try: VMachineController.sync_with_hypervisor(vm.guid, storagedriver_id=storagedriver_id) vm.status = 'SYNC' except: vm.status = 'SYNC_NOK' vm.save()
def update_vmachine_name(instance_id, old_name, new_name): """ Update a vMachine name: find vmachine by management center instance id, set new name :param instance_id: ID for the virtual machine known by management center :param old_name: Old name of the virtual machine :param new_name: New name for the virtual machine """ vmachine = None for mgmt_center in MgmtCenterList.get_mgmtcenters(): mgmt = Factory.get_mgmtcenter(mgmt_center = mgmt_center) try: machine_info = mgmt.get_vmachine_device_info(instance_id) file_name = machine_info['file_name'] host_name = machine_info['host_name'] vpool_name = machine_info['vpool_name'] storage_router = StorageRouterList.get_by_name(host_name) machine_id = storage_router.machine_id device_name = '{0}/{1}'.format(machine_id, file_name) vp = VPoolList.get_vpool_by_name(vpool_name) vmachine = VMachineList.get_by_devicename_and_vpool(device_name, vp) if vmachine: break vmachine = VMachineList.get_by_devicename_and_vpool(device_name, None) if vmachine: break except Exception as ex: VMachineController._logger.info('Trying to get mgmt center failed for vmachine {0}. {1}'.format(old_name, ex)) if not vmachine: VMachineController._logger.error('No vmachine found for name {0}'.format(old_name)) return vpool = vmachine.vpool mutex = volatile_mutex('{0}_{1}'.format(old_name, vpool.guid if vpool is not None else 'none')) try: mutex.acquire(wait=5) vmachine.name = new_name vmachine.save() finally: mutex.release()
def sync_with_hypervisor(vpool_guid): """ Syncs all vMachines of a given vPool with the hypervisor """ vpool = VPool(vpool_guid) for storagedriver in vpool.storagedrivers: pmachine = storagedriver.storagerouter.pmachine hypervisor = Factory.get(pmachine) for vm_object in hypervisor.get_vms_by_nfs_mountinfo(storagedriver.storage_ip, storagedriver.mountpoint): search_vpool = None if pmachine.hvtype == 'KVM' else vpool vmachine = VMachineList.get_by_devicename_and_vpool( devicename=vm_object['backing']['filename'], vpool=search_vpool ) VMachineController.update_vmachine_config(vmachine, vm_object, pmachine)
def snapshot_all_vms(): """ Snapshots all VMachines """ logger.info("[SSA] started") success = [] fail = [] machines = VMachineList.get_customer_vmachines() for machine in machines: try: VMachineController.snapshot(machineguid=machine.guid, label="", is_consistent=False, is_automatic=True) success.append(machine.guid) except: fail.append(machine.guid) logger.info("[SSA] Snapshot has been taken for {0} vMachines, {1} failed.".format(len(success), len(fail)))
def sync_with_hypervisor(vpool_guid): """ Syncs all vMachines of a given vPool with the hypervisor :param vpool_guid: Guid of the vPool to synchronize """ vpool = VPool(vpool_guid) if vpool.status != VPool.STATUSES.RUNNING: raise ValueError('Synchronizing with hypervisor is only allowed if your vPool is in {0} status'.format(VPool.STATUSES.RUNNING)) for storagedriver in vpool.storagedrivers: pmachine = storagedriver.storagerouter.pmachine hypervisor = Factory.get(pmachine) for vm_object in hypervisor.get_vms_by_nfs_mountinfo(storagedriver.storage_ip, storagedriver.mountpoint): search_vpool = None if pmachine.hvtype == 'KVM' else vpool vmachine = VMachineList.get_by_devicename_and_vpool(devicename=vm_object['backing']['filename'], vpool=search_vpool) VMachineController.update_vmachine_config(vmachine, vm_object, pmachine)
def can_be_deleted(self, storagedriver): """ Checks whether a Storage Driver can be deleted """ result = True storagerouter = storagedriver.storagerouter pmachine = storagerouter.pmachine vmachines = VMachineList.get_customer_vmachines() vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None] pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines] vpool = storagedriver.vpool if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids: result = False if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id): result = False return Response(result, status=status.HTTP_200_OK)
def can_be_deleted(self, storagedriver): """ Checks whether a Storage Driver can be deleted """ result = True storagerouter = storagedriver.storagerouter storagedrivers_left = len([sd for sd in storagerouter.storagedrivers if sd.guid != storagedriver.guid]) pmachine = storagerouter.pmachine vmachines = VMachineList.get_customer_vmachines() vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None] pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines] vpool = storagedriver.vpool if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids: result = False if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id): result = False return result
def list(self, vpoolguid=None, query=None): """ Overview of all machines """ if vpoolguid is not None: vpool = VPool(vpoolguid) vmachine_guids = [] vmachines = [] for vdisk in vpool.vdisks: if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids: vmachine_guids.append(vdisk.vmachine.guid) if vdisk.vmachine.is_vtemplate is False: vmachines.append(vdisk.vmachine) elif query is not None: query = json.loads(query) vmachines = DataList(VMachine, query) else: vmachines = VMachineList.get_vmachines() return vmachines
def snapshot_all_vms(): """ Snapshots all VMachines """ logger.info('[SSA] started') success = [] fail = [] machines = VMachineList.get_customer_vmachines() for machine in machines: try: VMachineController.snapshot(machineguid=machine.guid, label='', is_consistent=False, is_automatic=True) success.append(machine.guid) except: fail.append(machine.guid) logger.info('[SSA] {0} vMachines were snapshotted, {1} failed.'.format( len(success), len(fail)))
def delete_from_voldrv(name, storagedriver_id): """ This method will delete a vmachine based on the name of the vmx given """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool else: vpool = None vm = VMachineList.get_by_devicename_and_vpool(name, vpool) if vm is not None: MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_deleted', 'metadata': {'name': vm.name}}) vm.delete(abandon=['vdisks'])
def snapshot_all_vms(): """ Snapshots all VMachines """ ScheduledTaskController._logger.info('[SSA] started') success = [] fail = [] machines = VMachineList.get_customer_vmachines() for machine in machines: try: VMachineController.snapshot(machineguid=machine.guid, label='', is_consistent=False, is_automatic=True, is_sticky=False) success.append(machine.guid) except: fail.append(machine.guid) ScheduledTaskController._logger.info( '[SSA] Snapshot has been taken for {0} vMachines, {1} failed.'. format(len(success), len(fail)))
def sync_with_hypervisor(vpool_guid): """ Syncs all vMachines of a given vPool with the hypervisor :param vpool_guid: Guid of the vPool to synchronize """ vpool = VPool(vpool_guid) if vpool.status != VPool.STATUSES.RUNNING: raise ValueError( 'Synchronizing with hypervisor is only allowed if your vPool is in {0} status' .format(VPool.STATUSES.RUNNING)) for storagedriver in vpool.storagedrivers: pmachine = storagedriver.storagerouter.pmachine hypervisor = Factory.get(pmachine) for vm_object in hypervisor.get_vms_by_nfs_mountinfo( storagedriver.storage_ip, storagedriver.mountpoint): search_vpool = None if pmachine.hvtype == 'KVM' else vpool vmachine = VMachineList.get_by_devicename_and_vpool( devicename=vm_object['backing']['filename'], vpool=search_vpool) VMachineController.update_vmachine_config( vmachine, vm_object, pmachine)
def list(self, vpoolguid=None, query=None): """ Overview of all machines """ if vpoolguid is not None: vpool = VPool(vpoolguid) vmachine_guids = [] vmachines = [] for vdisk in vpool.vdisks: if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids: vmachine_guids.append(vdisk.vmachine.guid) if vdisk.vmachine.is_vtemplate is False: vmachines.append(vdisk.vmachine) elif query is not None: query = json.loads(query) query_result = DataList({'object': VMachine, 'data': DataList.select.GUIDS, 'query': query}).data vmachines = DataObjectList(query_result, VMachine) else: vmachines = VMachineList.get_vmachines() return vmachines
def update_from_voldrv(name, storagedriver_id): """ This method will update/create a vmachine based on a given vmx/xml file """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool machine_ids = [ storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers ] if hypervisor.should_process(name, machine_ids=machine_ids): if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) vpool = storagedriver.vpool else: vpool = None pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) mutex = VolatileMutex('{}_{}'.format( name, vpool.guid if vpool is not None else 'none')) try: mutex.acquire(wait=120) limit = 5 exists = hypervisor.file_exists(storagedriver, name) while limit > 0 and exists is False: time.sleep(1) exists = hypervisor.file_exists(storagedriver, name) limit -= 1 if exists is False: logger.info( 'Could not locate vmachine with name {0} on vpool {1}'. format(name, vpool.name)) vmachine = VMachineList.get_by_devicename_and_vpool( name, vpool) if vmachine is not None: VMachineController.delete_from_voldrv( name, storagedriver_id=storagedriver_id) return finally: mutex.release() try: mutex.acquire(wait=5) vmachine = VMachineList.get_by_devicename_and_vpool( name, vpool) if not vmachine: vmachine = VMachine() vmachine.vpool = vpool vmachine.pmachine = pmachine vmachine.status = 'CREATED' vmachine.devicename = name vmachine.save() finally: mutex.release() if pmachine.hvtype == 'KVM': try: VMachineController.sync_with_hypervisor( vmachine.guid, storagedriver_id=storagedriver_id) vmachine.status = 'SYNC' except: vmachine.status = 'SYNC_NOK' vmachine.save() else: logger.info('Ignored invalid file {0}'.format(name))
def gather_scrub_work(): logger.info("Divide scrubbing work among allowed Storage Routers") scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder) ) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter.ip) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError("No scrub locations found") vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0: vdisk_guids.add(vdisk.guid) logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format( "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub) ) ) if local is True: local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id)) ) storage_router_list.append(storage_router) logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name)) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub ) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if result is not None: logger.error( "Scrubbing failed on Storage Router {0} with error {1}".format( storage_router_list[index].name, result ) )
def _children(vmt): children = 0 disks = [vd.guid for vd in vmt.vdisks] for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]: for disk in disks: if vdisk == disk: children += 1 return children
def _bootstrap_dal_models(self): """ Load/hook dal models as snmp oids """ _guids = set() enabled_key = "{0}_config_dal_enabled".format(STORAGE_PREFIX) self.instance_oid = 0 try: enabled = self.persistent.get(enabled_key) except KeyNotFoundException: enabled = True # Enabled by default, can be disabled by setting the key if enabled: from ovs.dal.lists.vdisklist import VDiskList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.pmachinelist import PMachineList from ovs.dal.lists.vmachinelist import VMachineList from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.lists.storagedriverlist import StorageDriverList for storagerouter in StorageRouterList.get_storagerouters(): _guids.add(storagerouter.guid) if not self._check_added(storagerouter): self._register_dal_model(10, storagerouter, 'guid', "0") self._register_dal_model(10, storagerouter, 'name', "1") self._register_dal_model(10, storagerouter, 'pmachine', "3", key = 'host_status') self._register_dal_model(10, storagerouter, 'description', "4") self._register_dal_model(10, storagerouter, 'devicename', "5") self._register_dal_model(10, storagerouter, 'dtl_mode', "6") self._register_dal_model(10, storagerouter, 'ip', "8") self._register_dal_model(10, storagerouter, 'machineid', "9") self._register_dal_model(10, storagerouter, 'status', "10") self._register_dal_model(10, storagerouter, '#vdisks', "11", func = lambda storagerouter: len([vdisk for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]), atype = int) self._register_dal_model(10, storagerouter, '#vmachines', "12", func = lambda storagerouter: len(set([vdisk.vmachine.guid for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id])), atype = int) self._register_dal_model(10, storagerouter, '#stored_data', "13", func = lambda storagerouter: sum([vdisk.vmachine.stored_data for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]), atype = int) self.instance_oid += 1 for vm in VMachineList.get_vmachines(): _guids.add(vm.guid) if not self._check_added(vm): if vm.is_vtemplate: self._register_dal_model(11, vm, 'guid', "0") self._register_dal_model(11, vm, 'name', "1") def _children(vmt): children = 0 disks = [vd.guid for vd in vmt.vdisks] for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]: for disk in disks: if vdisk == disk: children += 1 return children self._register_dal_model(11, vm, '#children', 2, func = _children, atype = int) self.instance_oid += 1 for vm in VMachineList.get_vmachines(): _guids.add(vm.guid) if not self._check_added(vm): if not vm.is_vtemplate: self._register_dal_model(0, vm, 'guid', "0") self._register_dal_model(0, vm, 'name', "1") self._register_dal_model(0, vm, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(0, vm, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(0, vm, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(0, vm, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(0, vm, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.20", key = "data_written_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(0, vm, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(0, vm, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(0, vm, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(0, vm, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(0, vm, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(0, vm, 'stored_data', "3", atype = int) self._register_dal_model(0, vm, 'description', "4") self._register_dal_model(0, vm, 'devicename', "5") self._register_dal_model(0, vm, 'dtl_mode', "6") self._register_dal_model(0, vm, 'hypervisorid', "7") self._register_dal_model(0, vm, 'ip', "8") self._register_dal_model(0, vm, 'status', "10") self._register_dal_model(0, vm, 'stored_data', "10", atype = int) self._register_dal_model(0, vm, 'snapshots', "11", atype = int) self._register_dal_model(0, vm, 'vdisks', "12", atype = int) self._register_dal_model(0, vm, 'DTL', '13', func = lambda vm: 'DEGRADED' if all(item == 'DEGRADED' for item in [vd.info['failover_mode'] for vd in vm.vdisks]) else 'OK') self.instance_oid += 1 for vd in VDiskList.get_vdisks(): _guids.add(vd.guid) if not self._check_added(vd): self._register_dal_model(1, vd, 'guid', "0") self._register_dal_model(1, vd, 'name', "1") self._register_dal_model(1, vd, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.1", key = "data_written_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(1, vd, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(1, vd, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(1, vd, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(1, vd, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.20", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(1, vd, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(1, vd, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(1, vd, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(1, vd, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(1, vd, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(1, vd, 'info', "3", key = 'stored', atype = int) self._register_dal_model(1, vd, 'info', "4", key = 'failover_mode', atype = int) self._register_dal_model(1, vd, 'snapshots', "5", atype = int) self.instance_oid += 1 for pm in PMachineList.get_pmachines(): _guids.add(pm.guid) if not self._check_added(pm): self._register_dal_model(2, pm, 'guid', "0") self._register_dal_model(2, pm, 'name', "1") self._register_dal_model(2, pm, 'host_status', "2") self.instance_oid += 1 for vp in VPoolList.get_vpools(): _guids.add(vp.guid) if not self._check_added(vp): self._register_dal_model(3, vp, 'guid', "0") self._register_dal_model(3, vp, 'name', "1") self._register_dal_model(3, vp, 'statistics', "2.0", key = "operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.2", key = "data_read", atype = int) self._register_dal_model(3, vp, 'statistics', "2.3", key = "sco_cache_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.5", key = "sco_cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.6", key = "write_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.7", key = "cluster_cache_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.8", key = "read_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.10", key = "backend_write_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.11", key = "backend_data_read", atype = int) self._register_dal_model(3, vp, 'statistics', "2.12", key = "cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.15", key = "metadata_store_misses", atype = int) self._register_dal_model(3, vp, 'statistics', "2.16", key = "backend_data_written", atype = int) self._register_dal_model(3, vp, 'statistics', "2.17", key = "data_read_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.18", key = "read_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.19", key = "cluster_cache_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.20", key = "data_written_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.22", key = "cache_hits_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.23", key = "timestamp", atype = int) self._register_dal_model(3, vp, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.25", key = "backend_data_written_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.26", key = "backend_read_operations", atype = int) self._register_dal_model(3, vp, 'statistics', "2.27", key = "data_written", atype = int) self._register_dal_model(3, vp, 'statistics', "2.28", key = "metadata_store_hits", atype = int) self._register_dal_model(3, vp, 'statistics', "2.29", key = "backend_data_read_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.30", key = "operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.32", key = "data_transferred_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.33", key = "write_operations_ps", atype = int) self._register_dal_model(3, vp, 'statistics', "2.34", key = "data_transferred", atype = int) self._register_dal_model(3, vp, 'status', "3") self._register_dal_model(3, vp, 'description', "4") self._register_dal_model(3, vp, 'vdisks', "5", atype = int) self._register_dal_model(3, vp, '#vmachines', "6", func = lambda vp: len(set([vd.vmachine.guid for vd in vp.vdisks])), atype = int) self.instance_oid += 1 for storagedriver in StorageDriverList.get_storagedrivers(): _guids.add(storagedriver.guid) if not self._check_added(storagedriver): self._register_dal_model(4, storagedriver, 'guid', "0") self._register_dal_model(4, storagedriver, 'name', "1") self._register_dal_model(4, storagedriver, 'stored_data', "2", atype = int) self.instance_oid += 1 try: # try to load OVS Backends from ovs.dal.lists.albabackendlist import AlbaBackendList for backend in AlbaBackendList.get_albabackends(): _guids.add(backend.guid) if not self._check_added(backend): self._register_dal_model(5, backend, 'guid', 0) self._register_dal_model(5, backend, 'name', 1) for disk_id in range(len((backend.all_disks))): self._register_dal_model(5, backend, 'all_disks', '2.{0}.0'.format(disk_id), key = "name", index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.1'.format(disk_id), key = "usage.size", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.2'.format(disk_id), key = "usage.used", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.3'.format(disk_id), key = "usage.available", atype = long, index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.4'.format(disk_id), key = "state.state", index=disk_id) self._register_dal_model(5, backend, 'all_disks', '2.{0}.5'.format(disk_id), key = "node_id", index=disk_id) self.instance_oid += 1 except ImportError: print('OVS Backend not present') pass reload = False for object_guid in list(self.model_oids): if object_guid not in _guids: self.model_oids.remove(object_guid) reload = True if reload: self._reload_snmp()
def delete_snapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used """ logger.info('Delete snapshots started') day = timedelta(1) week = day * 7 def make_timestamp(offset): """ Create an integer based timestamp :param offset: Offset in days :return: Timestamp """ return int(mktime((base - offset).timetuple())) # Calculate bucket structure if timestamp is None: timestamp = time.time() base = datetime.fromtimestamp(timestamp).date() - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({'start': make_timestamp(day * i), 'end': make_timestamp(day * (i + 1)), 'type': '1d', 'snapshots': []}) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({'start': make_timestamp(week * i), 'end': make_timestamp(week * (i + 1)), 'type': '1w', 'snapshots': []}) buckets.append({'start': make_timestamp(week * 4), 'end': 0, 'type': 'rest', 'snapshots': []}) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: if snapshot.get('is_sticky') is True: continue timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot['snapshots'].iteritems(): bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: if snapshot.get('is_sticky') is True: continue timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot['is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp']] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp']] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot(diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished')
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id) )) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results = result_set.join(propagate=False) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
def delete_snapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used """ logger.info('Delete snapshots started') day = timedelta(1) week = day * 7 def make_timestamp(offset): """ Create an integer based timestamp :param offset: Offset in days :return: Timestamp """ return int(mktime((base - offset).timetuple())) # Calculate bucket structure if timestamp is None: timestamp = time.time() base = datetime.fromtimestamp(timestamp).date() - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({ 'start': make_timestamp(day * i), 'end': make_timestamp(day * (i + 1)), 'type': '1d', 'snapshots': [] }) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({ 'start': make_timestamp(week * i), 'end': make_timestamp(week * (i + 1)), 'type': '1w', 'snapshots': [] }) buckets.append({ 'start': make_timestamp(week * 4), 'end': 0, 'type': 'rest', 'snapshots': [] }) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: if snapshot.get('is_sticky') is True: continue timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot[ 'snapshots'].iteritems(): bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: if snapshot.get('is_sticky') is True: continue timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot[ 'is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp'] ] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp'] ] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot( diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished')
def delete_snapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used """ logger.info("Delete snapshots started") day = timedelta(1) week = day * 7 def make_timestamp(offset): return int(mktime((base - offset).timetuple())) # Calculate bucket structure if timestamp is None: timestamp = time.time() base = datetime.fromtimestamp(timestamp).date() - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append( {"start": make_timestamp(day * i), "end": make_timestamp(day * (i + 1)), "type": "1d", "snapshots": []} ) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append( { "start": make_timestamp(week * i), "end": make_timestamp(week * (i + 1)), "type": "1w", "snapshots": [], } ) buckets.append({"start": make_timestamp(week * 4), "end": 0, "type": "rest", "snapshots": []}) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info["object_type"] in ["BASE"] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot["timestamp"]) for bucket in bucket_chain: if bucket["start"] >= timestamp > bucket["end"]: for diskguid, snapshotguid in snapshot["snapshots"].iteritems(): bucket["snapshots"].append( { "timestamp": timestamp, "snapshotid": snapshotguid, "diskguid": diskguid, "is_consistent": snapshot["is_consistent"], } ) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info["object_type"] in ["BASE"]: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot["timestamp"]) for bucket in bucket_chain: if bucket["start"] >= timestamp > bucket["end"]: bucket["snapshots"].append( { "timestamp": timestamp, "snapshotid": snapshot["guid"], "diskguid": vdisk.guid, "is_consistent": snapshot["is_consistent"], } ) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket["snapshots"]: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot["is_consistent"] and not best["is_consistent"]: best = snapshot # Newer (larger timestamp) is better than older snapshots elif ( snapshot["is_consistent"] == best["is_consistent"] and snapshot["timestamp"] > best["timestamp"] ): best = snapshot bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != best["timestamp"]] first = False elif bucket["end"] > 0: oldest = None for snapshot in bucket["snapshots"]: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot["timestamp"] < oldest["timestamp"]: oldest = snapshot bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != oldest["timestamp"]] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket["snapshots"]: VDiskController.delete_snapshot(diskguid=snapshot["diskguid"], snapshotid=snapshot["snapshotid"]) logger.info("Delete snapshots finished")
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int( mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({ 'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': [] }) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({ 'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': [] }) buckets.append({ 'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': [] }) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot[ 'snapshots'].iteritems(): bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({ 'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent'] }) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot[ 'is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp'] ] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [ s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp'] ] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot( diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE'] and len( vdisk.child_vdisks) == 0: vdisks.append(vdisk) total = 0 failed = 0 skipped = 0 storagedrivers = {} for vdisk in vdisks: try: total += 1 # Load the vDisk's StorageDriver vdisk.invalidate_dynamics(['info', 'storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[ vdisk. storagedriver_id] = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: # The MDS master is not local. Trigger an MDS handover and try again logger.debug( 'MDS for volume {0} is not local. Trigger handover'. format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: skipped += 1 logger.info( 'Skipping scrubbing work unit for volume {0}: MDS master is not local' .format(vdisk.volume_id)) continue work_units = vdisk.storagedriver_client.get_scrubbing_workunits( str(vdisk.volume_id)) for work_unit in work_units: scrubbing_result = _storagedriver_scrubber.scrub( work_unit, str(storagedriver.mountpoint_temp)) vdisk.storagedriver_client.apply_scrubbing_result( scrubbing_result) except Exception, ex: failed += 1 logger.info( 'Failed scrubbing work unit for volume {0}: {1}'.format( vdisk.volume_id, ex))
def create_from_template(name, machineguid, pmachineguid, description=None): """ Create a new vmachine using an existing vmachine template :param machineguid: guid of the template vmachine :param name: name of new vmachine :param pmachineguid: guid of hypervisor to create new vmachine on :param description: Description for the machine :return: guid of the newly created vmachine | False on any failure """ template_vm = VMachine(machineguid) if not template_vm.is_vtemplate: return False target_pm = PMachine(pmachineguid) target_hypervisor = Factory.get(target_pm) storagerouters = [sr for sr in StorageRouterList.get_storagerouters() if sr.pmachine_guid == target_pm.guid] if len(storagerouters) == 1: target_storagerouter = storagerouters[0] else: raise ValueError('Pmachine {0} has no StorageRouter assigned to it'.format(pmachineguid)) routing_key = "sr.{0}".format(target_storagerouter.machine_id) vpool = None vpool_guids = set() if template_vm.vpool is not None: vpool = template_vm.vpool vpool_guids.add(vpool.guid) for disk in template_vm.vdisks: vpool = disk.vpool vpool_guids.add(vpool.guid) if len(vpool_guids) != 1: raise RuntimeError('Only 1 vpool supported on template disk(s) - {0} found!'.format(len(vpool_guids))) if not template_vm.pmachine.hvtype == target_pm.hvtype: raise RuntimeError('Source and target hypervisor not identical') # Currently, only one vPool is supported, so we can just use whatever the `vpool` variable above # was set to as 'the' vPool for the code below. This obviously will have to change once vPool mixes # are supported. target_storagedriver = None source_storagedriver = None for vpool_storagedriver in vpool.storagedrivers: if vpool_storagedriver.storagerouter.pmachine_guid == target_pm.guid: target_storagedriver = vpool_storagedriver if vpool_storagedriver.storagerouter.pmachine_guid == template_vm.pmachine_guid: source_storagedriver = vpool_storagedriver if target_storagedriver is None: raise RuntimeError('Volume not served on target hypervisor') source_hv = Factory.get(template_vm.pmachine) target_hv = Factory.get(target_pm) if not source_hv.is_datastore_available(source_storagedriver.storage_ip, source_storagedriver.mountpoint): raise RuntimeError('Datastore unavailable on source hypervisor') if not target_hv.is_datastore_available(target_storagedriver.storage_ip, target_storagedriver.mountpoint): raise RuntimeError('Datastore unavailable on target hypervisor') source_vm = source_hv.get_vm_object(template_vm.hypervisor_id) if not source_vm: raise RuntimeError('VM with key reference {0} not found'.format(template_vm.hypervisor_id)) name_duplicates = VMachineList.get_vmachine_by_name(name) if name_duplicates is not None and len(name_duplicates) > 0: raise RuntimeError('A vMachine with name {0} already exists'.format(name)) vm_path = target_hypervisor.get_vmachine_path(name, target_storagedriver.storagerouter.machine_id) new_vm = VMachine() new_vm.copy(template_vm) new_vm.hypervisor_id = '' new_vm.vpool = template_vm.vpool new_vm.pmachine = target_pm new_vm.name = name new_vm.description = description new_vm.is_vtemplate = False new_vm.devicename = target_hypervisor.clean_vmachine_filename(vm_path) new_vm.status = 'CREATED' new_vm.save() storagedrivers = [storagedriver for storagedriver in vpool.storagedrivers if storagedriver.storagerouter.pmachine_guid == new_vm.pmachine_guid] if len(storagedrivers) == 0: raise RuntimeError('Cannot find Storage Driver serving {0} on {1}'.format(vpool.name, new_vm.pmachine.name)) disks = [] disks_by_order = sorted(template_vm.vdisks, key=lambda x: x.order) for disk in disks_by_order: try: prefix = '{0}-clone'.format(disk.name) result = VDiskController.create_from_template( diskguid=disk.guid, devicename=prefix, pmachineguid=target_pm.guid, machinename=new_vm.name, machineguid=new_vm.guid ) disks.append(result) VMachineController._logger.debug('Disk appended: {0}'.format(result)) except Exception as exception: VMachineController._logger.error('Creation of disk {0} failed: {1}'.format(disk.name, str(exception)), print_msg=True) VMachineController.delete.s(machineguid=new_vm.guid).apply_async(routing_key = routing_key) raise try: result = target_hv.create_vm_from_template( name, source_vm, disks, target_storagedriver.storage_ip, target_storagedriver.mountpoint, wait=True ) except Exception as exception: VMachineController._logger.error('Creation of vm {0} on hypervisor failed: {1}'.format(new_vm.name, str(exception)), print_msg=True) VMachineController.delete.s(machineguid=new_vm.guid).apply_async(routing_key = routing_key) raise new_vm.hypervisor_id = result new_vm.status = 'SYNC' new_vm.save() return new_vm.guid
def update_from_voldrv(name, storagedriver_id): """ This method will update/create a vmachine based on a given vmx/xml file :param name: Name of the vmx :param storagedriver_id: Storage Driver hosting the vmachine """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) if pmachine.hvtype not in ['VMWARE', 'KVM']: return hypervisor = Factory.get(pmachine) name = hypervisor.clean_vmachine_filename(name) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool machine_ids = [storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers] if hypervisor.should_process(name, machine_ids=machine_ids): if pmachine.hvtype == 'VMWARE': storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) vpool = storagedriver.vpool else: vpool = None pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) mutex = volatile_mutex('{0}_{1}'.format(name, vpool.guid if vpool is not None else 'none')) try: mutex.acquire(wait=120) limit = 5 exists = hypervisor.file_exists(storagedriver, name) while limit > 0 and exists is False: time.sleep(1) exists = hypervisor.file_exists(storagedriver, name) limit -= 1 if exists is False: VMachineController._logger.info('Could not locate vmachine with name {0} on vpool {1}'.format(name, vpool.name)) vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool) if vmachine is not None: VMachineController.delete_from_voldrv(name, storagedriver_id=storagedriver_id) return finally: mutex.release() try: mutex.acquire(wait=5) vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool) if not vmachine: vmachines = VMachineList.get_vmachine_by_name(name) if vmachines is not None: vmachine = vmachines[0] if not vmachine: vmachine = VMachine() vmachine.vpool = vpool vmachine.pmachine = pmachine vmachine.status = 'CREATED' vmachine.devicename = name vmachine.save() finally: mutex.release() if pmachine.hvtype == 'KVM': try: mutex.acquire(wait=120) VMachineController.sync_with_hypervisor(vmachine.guid, storagedriver_id=storagedriver_id) vmachine.status = 'SYNC' except: vmachine.status = 'SYNC_NOK' finally: mutex.release() vmachine.save() else: VMachineController._logger.info('Ignored invalid file {0}'.format(name))
def get_vmachines(): """ Retrieve all Virtual Machines :return: Virtual Machine data-object list """ return VMachineList.get_vmachines()
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': []}) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': []}) buckets.append({'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': []}) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot['snapshots'].iteritems(): bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot['is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp']] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp']] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot(diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0: vdisks.append(vdisk) total = 0 failed = 0 skipped = 0 storagedrivers = {} for vdisk in vdisks: try: total += 1 # Load the vDisk's StorageDriver vdisk.invalidate_dynamics(['info', 'storagedriver_id']) if vdisk.storagedriver_id not in storagedrivers: storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) storagedriver = storagedrivers[vdisk.storagedriver_id] # Load the vDisk's MDS configuration vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: # The MDS master is not local. Trigger an MDS handover and try again logger.debug('MDS for volume {0} is not local. Trigger handover'.format(vdisk.volume_id)) MDSServiceController.ensure_safety(vdisk) vdisk.invalidate_dynamics(['info']) configs = vdisk.info['metadata_backend_config'] if len(configs) == 0: raise RuntimeError('Could not load MDS configuration') if configs[0]['ip'] != storagedriver.storagerouter.ip: skipped += 1 logger.info('Skipping scrubbing work unit for volume {0}: MDS master is not local'.format( vdisk.volume_id )) continue work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id)) for work_unit in work_units: scrubbing_result = _storagedriver_scrubber.scrub(work_unit, str(storagedriver.mountpoint_temp)) vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result) except Exception, ex: failed += 1 logger.info('Failed scrubbing work unit for volume {0}: {1}'.format( vdisk.volume_id, ex ))
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: _ = SSHClient(storage_driver.storagerouter) scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = ResultSet([]) storage_router_list = [] for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set.add( ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id))) storage_router_list.append(storage_router) # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results = result_set.join( propagate=False ) # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail for index, result in enumerate(all_results): if isinstance(result, list): processed_guids.extend(result) else: logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(storage_router_list[index].ip, result)) if len(processed_guids) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') logger.info('Gather Scrub - Finished')
def create_from_template(name, machineguid, pmachineguid, description=None): """ Create a new vmachine using an existing vmachine template @param machineguid: guid of the template vmachine @param name: name of new vmachine @param pmachineguid: guid of hypervisor to create new vmachine on @return: guid of the newly created vmachine | False on any failure """ template_vm = VMachine(machineguid) if not template_vm.is_vtemplate: return False target_pm = PMachine(pmachineguid) target_hypervisor = Factory.get(target_pm) storagerouters = [ sr for sr in StorageRouterList.get_storagerouters() if sr.pmachine_guid == target_pm.guid ] if len(storagerouters) == 1: target_storagerouter = storagerouters[0] else: raise ValueError( 'Pmachine {} has no StorageRouter assigned to it'.format( pmachineguid)) routing_key = "sr.{0}".format(target_storagerouter.machine_id) vpool = None vpool_guids = set() if template_vm.vpool is not None: vpool = template_vm.vpool vpool_guids.add(vpool.guid) for disk in template_vm.vdisks: vpool = disk.vpool vpool_guids.add(vpool.guid) if len(vpool_guids) != 1: raise RuntimeError( 'Only 1 vpool supported on template disk(s) - {0} found!'. format(len(vpool_guids))) if not template_vm.pmachine.hvtype == target_pm.hvtype: raise RuntimeError('Source and target hypervisor not identical') # Currently, only one vPool is supported, so we can just use whatever the `vpool` variable above # was set to as 'the' vPool for the code below. This obviously will have to change once vPool mixes # are supported. target_storagedriver = None source_storagedriver = None for vpool_storagedriver in vpool.storagedrivers: if vpool_storagedriver.storagerouter.pmachine_guid == target_pm.guid: target_storagedriver = vpool_storagedriver if vpool_storagedriver.storagerouter.pmachine_guid == template_vm.pmachine_guid: source_storagedriver = vpool_storagedriver if target_storagedriver is None: raise RuntimeError('Volume not served on target hypervisor') source_hv = Factory.get(template_vm.pmachine) target_hv = Factory.get(target_pm) if not source_hv.is_datastore_available( source_storagedriver.storage_ip, source_storagedriver.mountpoint): raise RuntimeError('Datastore unavailable on source hypervisor') if not target_hv.is_datastore_available( target_storagedriver.storage_ip, target_storagedriver.mountpoint): raise RuntimeError('Datastore unavailable on target hypervisor') source_vm = source_hv.get_vm_object(template_vm.hypervisor_id) if not source_vm: raise RuntimeError('VM with key reference {0} not found'.format( template_vm.hypervisor_id)) name_duplicates = VMachineList.get_vmachine_by_name(name) if name_duplicates is not None and len(name_duplicates) > 0: raise RuntimeError( 'A vMachine with name {0} already exists'.format(name)) vm_path = target_hypervisor.get_vmachine_path( name, target_storagedriver.storagerouter.machine_id) new_vm = VMachine() new_vm.copy(template_vm) new_vm.hypervisor_id = '' new_vm.vpool = template_vm.vpool new_vm.pmachine = target_pm new_vm.name = name new_vm.description = description new_vm.is_vtemplate = False new_vm.devicename = target_hypervisor.clean_vmachine_filename(vm_path) new_vm.status = 'CREATED' new_vm.save() storagedrivers = [ storagedriver for storagedriver in vpool.storagedrivers if storagedriver.storagerouter.pmachine_guid == new_vm.pmachine_guid ] if len(storagedrivers) == 0: raise RuntimeError( 'Cannot find Storage Driver serving {0} on {1}'.format( vpool.name, new_vm.pmachine.name)) storagedriverguid = storagedrivers[0].guid disks = [] disks_by_order = sorted(template_vm.vdisks, key=lambda x: x.order) try: for disk in disks_by_order: prefix = '{0}-clone'.format(disk.name) result = VDiskController.create_from_template( diskguid=disk.guid, devicename=prefix, pmachineguid=target_pm.guid, machinename=new_vm.name, machineguid=new_vm.guid, storagedriver_guid=storagedriverguid) disks.append(result) logger.debug('Disk appended: {0}'.format(result)) except Exception as exception: logger.error('Creation of disk {0} failed: {1}'.format( disk.name, str(exception)), print_msg=True) VMachineController.delete.s(machineguid=new_vm.guid).apply_async( routing_key=routing_key) raise try: result = target_hv.create_vm_from_template( name, source_vm, disks, target_storagedriver.storage_ip, target_storagedriver.mountpoint, wait=True) except Exception as exception: logger.error('Creation of vm {0} on hypervisor failed: {1}'.format( new_vm.name, str(exception)), print_msg=True) VMachineController.delete.s(machineguid=new_vm.guid).apply_async( routing_key=routing_key) raise new_vm.hypervisor_id = result new_vm.status = 'SYNC' new_vm.save() return new_vm.guid
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ ScheduledTaskController._logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: sshclient = SSHClient(storage_driver.storagerouter) # Use ServiceManager(sshclient) to make sure ovs-workers are actually running if ServiceManager.get_service_status('workers', sshclient) is False: ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip)) else: scrub_locations[storage_driver.storagerouter] = str(partition.path) except UnableToConnectException: ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) if len(vdisk_guids) == 0: ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids))) return ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = {} storage_router_list = [] scrub_map = {} for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id)) storage_router_list.append(storage_router) scrub_map[storage_router.ip] = vdisk_guids_to_scrub # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex)) all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result)) result_set = {} for failed_node in failed_nodes: ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node)) vdisk_guids_to_scrub = scrub_map[failed_node] rescheduled_work = False for storage_router, scrub_location in scrub_locations.items(): if storage_router.ip not in failed_nodes: if storage_router.machine_id != local_machineid: ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip)) result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location, vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format(storage_router.machine_id)) storage_router_list.append(storage_router) rescheduled_work = True break if rescheduled_work is False: if local_scrub_location is not None: try: processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location, vdisk_guids=vdisk_guids_to_scrub)) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex)) else: ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node)) if len(result_set) > 0: all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results2.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result)) if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') ScheduledTaskController._logger.info('Gather Scrub - Finished')
def deletescrubsnapshots(timestamp=None): """ Delete snapshots & scrubbing policy Implemented delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete """ logger.info('Delete snapshots started') day = 60 * 60 * 24 week = day * 7 # Calculate bucket structure if timestamp is None: timestamp = time.time() offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): buckets.append({'start': offset - (day * i), 'end': offset - (day * (i + 1)), 'type': '1d', 'snapshots': []}) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): buckets.append({'start': offset - (week * i), 'end': offset - (week * (i + 1)), 'type': '1w', 'snapshots': []}) buckets.append({'start': offset - (week * 4), 'end': 0, 'type': 'rest', 'snapshots': []}) # Place all snapshots in bucket_chains bucket_chains = [] for vmachine in VMachineList.get_customer_vmachines(): if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks): bucket_chain = copy.deepcopy(buckets) for snapshot in vmachine.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: for diskguid, snapshotguid in snapshot['snapshots'].iteritems(): bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshotguid, 'diskguid': diskguid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) for snapshot in vdisk.snapshots: timestamp = int(snapshot['timestamp']) for bucket in bucket_chain: if bucket['start'] >= timestamp > bucket['end']: bucket['snapshots'].append({'timestamp': timestamp, 'snapshotid': snapshot['guid'], 'diskguid': vdisk.guid, 'is_consistent': snapshot['is_consistent']}) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep # And we'll remove all snapshots that remain in the buckets for bucket_chain in bucket_chains: first = True for bucket in bucket_chain: if first is True: best = None for snapshot in bucket['snapshots']: if best is None: best = snapshot # Consistent is better than inconsistent elif snapshot['is_consistent'] and not best['is_consistent']: best = snapshot # Newer (larger timestamp) is better than older snapshots elif snapshot['is_consistent'] == best['is_consistent'] and \ snapshot['timestamp'] > best['timestamp']: best = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != best['timestamp']] first = False elif bucket['end'] > 0: oldest = None for snapshot in bucket['snapshots']: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep elif snapshot['timestamp'] < oldest['timestamp']: oldest = snapshot bucket['snapshots'] = [s for s in bucket['snapshots'] if s['timestamp'] != oldest['timestamp']] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: for snapshot in bucket['snapshots']: VDiskController.delete_snapshot(diskguid=snapshot['diskguid'], snapshotid=snapshot['snapshotid']) logger.info('Delete snapshots finished') logger.info('Scrubbing started') vdisks = [] for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] in ['BASE']: vdisks.append(vdisk) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] in ['BASE']: vdisks.append(vdisk) total = 0 failed = 0 for vdisk in vdisks: work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id)) for work_unit in work_units: try: total += 1 scrubbing_result = _storagedriver_scrubber.scrub(work_unit, vdisk.vpool.mountpoint_temp) vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result) except: failed += 1 logger.info('Failed scrubbing work unit for volume {}'.format( vdisk.volume_id )) logger.info('Scrubbing finished. {} out of {} items failed.'.format( failed, total ))
def remove_storagedriver(storagedriver_guid): """ Removes a Storage Driver (and, if it was the last Storage Driver for a vPool, the vPool is removed as well) """ # Get objects & Make some checks storagedriver = StorageDriver(storagedriver_guid) storagerouter = storagedriver.storagerouter ip = storagerouter.ip pmachine = storagerouter.pmachine vmachines = VMachineList.get_customer_vmachines() pmachine_guids = [vm.pmachine_guid for vm in vmachines] vpools_guids = [vm.vpool_guid for vm in vmachines if vm.vpool_guid is not None] vpool = storagedriver.vpool if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids: raise RuntimeError('There are still vMachines served from the given Storage Driver') if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id): raise RuntimeError('There are still vDisks served from the given Storage Driver') services = ['volumedriver_{0}'.format(vpool.name), 'failovercache_{0}'.format(vpool.name)] storagedrivers_left = False # Stop services for current_storagedriver in vpool.storagedrivers: if current_storagedriver.guid != storagedriver_guid: storagedrivers_left = True client = SSHClient.load(current_storagedriver.storagerouter.ip) for service in services: System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.disable_service('{0}') """.format(service)) System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.stop_service('{0}') """.format(service)) # Unconfigure Cinder ovsdb = PersistentFactory.get_client() key = str('ovs_openstack_cinder_%s' % storagedriver.vpool_guid) if ovsdb.exists(key): cinder_password, cinder_user, tenant_name, controller_ip, _ = ovsdb.get(key) client = SSHClient.load(ip) System.exec_remote_python(client, """ from ovs.extensions.openstack.cinder import OpenStackCinder osc = OpenStackCinder(cinder_password = '******', cinder_user = '******', tenant_name = '{2}', controller_ip = '{3}') osc.unconfigure_vpool('{4}', '{5}', {6}) """.format(cinder_password, cinder_user, tenant_name, controller_ip, vpool.name, storagedriver.mountpoint, not storagedrivers_left)) if not storagedrivers_left: ovsdb.delete(key) # KVM pool client = SSHClient.load(ip) if pmachine.hvtype == 'KVM': if vpool.name in client.run('virsh pool-list'): client.run('virsh pool-destroy {0}'.format(vpool.name)) try: client.run('virsh pool-undefine {0}'.format(vpool.name)) except: pass # Ignore undefine errors, since that can happen on re-entrance # Remove services client = SSHClient.load(ip) for service in services: System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.remove_service(domain='openvstorage', name='{0}') """.format(service)) configuration_dir = System.read_remote_config(client, 'ovs.core.cfgdir') voldrv_arakoon_cluster_id = str(System.read_remote_config(client, 'volumedriver.arakoon.clusterid')) voldrv_arakoon_cluster = ArakoonManagementEx().getCluster(voldrv_arakoon_cluster_id) voldrv_arakoon_client_config = voldrv_arakoon_cluster.getClientConfig() arakoon_node_configs = [] for arakoon_node in voldrv_arakoon_client_config.keys(): arakoon_node_configs.append(ArakoonNodeConfig(arakoon_node, voldrv_arakoon_client_config[arakoon_node][0][0], voldrv_arakoon_client_config[arakoon_node][1])) vrouter_clusterregistry = ClusterRegistry(str(vpool.guid), voldrv_arakoon_cluster_id, arakoon_node_configs) # Reconfigure volumedriver if storagedrivers_left: node_configs = [] for current_storagedriver in vpool.storagedrivers: if current_storagedriver.guid != storagedriver_guid: node_configs.append(ClusterNodeConfig(str(current_storagedriver.storagedriver_id), str(current_storagedriver.cluster_ip), current_storagedriver.ports[0], current_storagedriver.ports[1], current_storagedriver.ports[2])) vrouter_clusterregistry.set_node_configs(node_configs) else: try: storagedriver_client = LocalStorageRouterClient('{0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name)) storagedriver_client.destroy_filesystem() vrouter_clusterregistry.erase_node_configs() except RuntimeError as ex: print('Could not destroy filesystem or erase node configs due to error: {}'.format(ex)) # Cleanup directories client = SSHClient.load(ip) client.run('rm -rf {}/read1_{}'.format(storagedriver.mountpoint_readcache1, vpool.name)) if storagedriver.mountpoint_readcache2: client.run('rm -rf {}/read2_{}'.format(storagedriver.mountpoint_readcache2, vpool.name)) client.run('rm -rf {}/sco_{}'.format(storagedriver.mountpoint_writecache, vpool.name)) client.run('rm -rf {}/foc_{}'.format(storagedriver.mountpoint_foc, vpool.name)) client.run('rm -rf {}/fd_{}'.format(storagedriver.mountpoint_writecache, vpool.name)) client.run('rm -rf {}/metadata_{}'.format(storagedriver.mountpoint_md, vpool.name)) client.run('rm -rf {}/tlogs_{}'.format(storagedriver.mountpoint_md, vpool.name)) client.run('rm -rf /var/rsp/{}'.format(vpool.name)) # Remove files client.run('rm -f {0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name)) # Remove top directories client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache1)) if storagedriver.mountpoint_readcache2: client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache2)) client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_writecache)) client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_foc)) client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_md)) client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint)) # First model cleanup storagedriver.delete(abandon=True) # Detach from the log entries if storagedrivers_left: # Restart leftover services for current_storagedriver in vpool.storagedrivers: if current_storagedriver.guid != storagedriver_guid: client = SSHClient.load(current_storagedriver.storagerouter.ip) for service in services: System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.enable_service('{0}') """.format(service)) System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service if Service.has_service('{0}'): Service.start_service('{0}') """.format(service)) else: # Final model cleanup vpool.delete()
def gather_scrub_work(): """ Retrieve and execute scrub work :return: None """ ScheduledTaskController._logger.info('Gather Scrub - Started') scrub_locations = {} for storage_driver in StorageDriverList.get_storagedrivers(): for partition in storage_driver.partitions: if DiskPartition.ROLES.SCRUB == partition.role: ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}' .format(storage_driver.storagerouter.ip, partition.path)) if storage_driver.storagerouter not in scrub_locations: try: sshclient = SSHClient(storage_driver.storagerouter) # Use ServiceManager(sshclient) to make sure ovs-workers are actually running if ServiceManager.get_service_status( 'workers', sshclient) is False: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} - workers are not running' .format(storage_driver.storagerouter.ip)) else: scrub_locations[ storage_driver.storagerouter] = str( partition.path) except UnableToConnectException: ScheduledTaskController._logger.warning( 'Gather Scrub - Storage Router {0:<15} is not reachable' .format(storage_driver.storagerouter.ip)) if len(scrub_locations) == 0: raise RuntimeError('No scrub locations found') vdisk_guids = set() for vmachine in VMachineList.get_customer_vmachines(): for vdisk in vmachine.vdisks: if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) for vdisk in VDiskList.get_without_vmachine(): if vdisk.info['object_type'] == 'BASE': vdisk_guids.add(vdisk.guid) if len(vdisk_guids) == 0: ScheduledTaskController._logger.info( 'Gather Scrub - No scrub work needed'.format(len(vdisk_guids))) return ScheduledTaskController._logger.info( 'Gather Scrub - Checking {0} volumes for scrub work'.format( len(vdisk_guids))) local_machineid = System.get_my_machine_id() local_storage_router = None local_scrub_location = None local_vdisks_to_scrub = [] result_set = {} storage_router_list = [] scrub_map = {} for index, scrub_info in enumerate(scrub_locations.items()): start_index = index * len(vdisk_guids) / len(scrub_locations) end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations) storage_router = scrub_info[0] vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index] local = storage_router.machine_id == local_machineid ScheduledTaskController._logger.info( 'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks' .format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub))) if local is True: local_storage_router = storage_router local_scrub_location = scrub_info[1] local_vdisks_to_scrub = vdisk_guids_to_scrub else: result_set[storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) scrub_map[storage_router.ip] = vdisk_guids_to_scrub # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish processed_guids = [] if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0: try: processed_guids = ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(local_storage_router.ip, ex)) all_results, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60) # Check every 60 seconds if tasks are still running for ip, result in all_results.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) result_set = {} for failed_node in failed_nodes: ScheduledTaskController._logger.warning( 'Scrubbing failed on node {0}. Will reschedule on another node.' .format(failed_node)) vdisk_guids_to_scrub = scrub_map[failed_node] rescheduled_work = False for storage_router, scrub_location in scrub_locations.items(): if storage_router.ip not in failed_nodes: if storage_router.machine_id != local_machineid: ScheduledTaskController._logger.info( 'Rescheduled scrub work from node {0} to node {1}.' .format(failed_node, storage_router.ip)) result_set[ storage_router. ip] = ScheduledTaskController._execute_scrub_work.s( scrub_location=scrub_location, vdisk_guids=vdisk_guids_to_scrub).apply_async( routing_key='sr.{0}'.format( storage_router.machine_id)) storage_router_list.append(storage_router) rescheduled_work = True break if rescheduled_work is False: if local_scrub_location is not None: try: processed_guids.extend( ScheduledTaskController._execute_scrub_work( scrub_location=local_scrub_location, vdisk_guids=vdisk_guids_to_scrub)) except Exception as ex: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}' .format(ex)) else: ScheduledTaskController._logger.warning( 'No nodes left to reschedule work from node {0}'. format(failed_node)) if len(result_set) > 0: all_results2, failed_nodes = CeleryToolbox.manage_running_tasks( result_set, timesleep=60 ) # Check every 60 seconds if tasks are still running for ip, result in all_results2.iteritems(): if isinstance(result, list): processed_guids.extend(result) else: ScheduledTaskController._logger.error( 'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}' .format(ip, result)) if len(set(processed_guids)) != len(vdisk_guids) or set( processed_guids).difference(vdisk_guids): raise RuntimeError('Scrubbing failed for 1 or more storagerouters') ScheduledTaskController._logger.info('Gather Scrub - Finished')