def invalidate_vmachine_status(self, name):
     if not name.endswith('.xml'):
         return
     devicename = '{0}/{1}'.format(System.get_my_machine_id(), name)
     vm = VMachineList().get_by_devicename_and_vpool(devicename, None)
     if vm:
         vm.invalidate_dynamics()
         logger.debug('Hypervisor status invalidated for: {0}'.format(name))
    def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        storagedrivers_left = len([
            sd for sd in storagerouter.storagedrivers
            if sd.guid != storagedriver.guid
        ])
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [
            vmachine.vpool_guid for vmachine in vmachines
            if vmachine.vpool_guid is not None
        ]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks
               if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return result
 def get_vmachine_by_name(name):
     """
     Retrieve the DAL vMachine object based on its name
     :param name: Name of the virtual machine
     :return: vMachine DAL object
     """
     return VMachineList.get_vmachine_by_name(vmname=name)
Beispiel #4
0
    def delete_from_voldrv(name, storagedriver_id):
        """
        This method will delete a vmachine based on the name of the vmx given
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        if pmachine.hvtype == 'VMWARE':
            storagedriver = StorageDriverList.get_by_storagedriver_id(
                storagedriver_id)
            vpool = storagedriver.vpool
        else:
            vpool = None
        vm = VMachineList.get_by_devicename_and_vpool(name, vpool)
        if vm is not None:
            MessageController.fire(MessageController.Type.EVENT, {
                'type': 'vmachine_deleted',
                'metadata': {
                    'name': vm.name
                }
            })
            vm.delete(abandon=['vdisks'])
Beispiel #5
0
    def rename_from_voldrv(old_name, new_name, storagedriver_id):
        """
        This machine will handle the rename of a vmx file
        :param old_name: Old name of vmx
        :param new_name: New name for the vmx
        :param storagedriver_id: Storage Driver hosting the vmachine
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        if pmachine.hvtype == 'VMWARE':
            storagedriver = StorageDriverList.get_by_storagedriver_id(
                storagedriver_id)
            vpool = storagedriver.vpool
        else:
            vpool = None

        old_name = hypervisor.clean_vmachine_filename(old_name)
        new_name = hypervisor.clean_vmachine_filename(new_name)
        scenario = hypervisor.get_rename_scenario(old_name, new_name)
        if scenario == 'RENAME':
            # Most likely a change from path. Updating path
            vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool)
            if vm is not None:
                vm.devicename = new_name
                vm.save()
        elif scenario == 'UPDATE':
            vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool)
            if vm is None:
                # The vMachine doesn't seem to exist, so it's likely the create didn't came trough
                # Let's create it anyway
                VMachineController.update_from_voldrv(
                    new_name, storagedriver_id=storagedriver_id)
            vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool)
            if vm is None:
                raise RuntimeError(
                    'Could not create vMachine on rename. Aborting.')
            try:
                VMachineController.sync_with_hypervisor(
                    vm.guid, storagedriver_id=storagedriver_id)
                vm.status = 'SYNC'
            except:
                vm.status = 'SYNC_NOK'
            vm.save()
Beispiel #6
0
    def update_vmachine_name(instance_id, old_name, new_name):
        """
        Update a vMachine name: find vmachine by management center instance id, set new name
        :param instance_id: ID for the virtual machine known by management center
        :param old_name: Old name of the virtual machine
        :param new_name: New name for the virtual machine
        """
        vmachine = None
        for mgmt_center in MgmtCenterList.get_mgmtcenters():
            mgmt = Factory.get_mgmtcenter(mgmt_center=mgmt_center)
            try:
                machine_info = mgmt.get_vmachine_device_info(instance_id)
                file_name = machine_info['file_name']
                host_name = machine_info['host_name']
                vpool_name = machine_info['vpool_name']
                storage_router = StorageRouterList.get_by_name(host_name)
                machine_id = storage_router.machine_id
                device_name = '{0}/{1}'.format(machine_id, file_name)
                vp = VPoolList.get_vpool_by_name(vpool_name)
                vmachine = VMachineList.get_by_devicename_and_vpool(
                    device_name, vp)
                if vmachine:
                    break
                vmachine = VMachineList.get_by_devicename_and_vpool(
                    device_name, None)
                if vmachine:
                    break
            except Exception as ex:
                logger.info(
                    'Trying to get mgmt center failed for vmachine {0}. {1}'.
                    format(old_name, ex))
        if not vmachine:
            logger.error('No vmachine found for name {0}'.format(old_name))
            return

        vpool = vmachine.vpool
        mutex = VolatileMutex('{0}_{1}'.format(
            old_name, vpool.guid if vpool is not None else 'none'))
        try:
            mutex.acquire(wait=5)
            vmachine.name = new_name
            vmachine.save()
        finally:
            mutex.release()
Beispiel #7
0
    def rename_from_voldrv(old_name, new_name, storagedriver_id):
        """
        This machine will handle the rename of a vmx file
        :param old_name: Old name of vmx
        :param new_name: New name for the vmx
        :param storagedriver_id: Storage Driver hosting the vmachine
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        if pmachine.hvtype == 'VMWARE':
            storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
            vpool = storagedriver.vpool
        else:
            vpool = None

        old_name = hypervisor.clean_vmachine_filename(old_name)
        new_name = hypervisor.clean_vmachine_filename(new_name)
        scenario = hypervisor.get_rename_scenario(old_name, new_name)
        if scenario == 'RENAME':
            # Most likely a change from path. Updating path
            vm = VMachineList.get_by_devicename_and_vpool(old_name, vpool)
            if vm is not None:
                vm.devicename = new_name
                vm.save()
        elif scenario == 'UPDATE':
            vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool)
            if vm is None:
                # The vMachine doesn't seem to exist, so it's likely the create didn't came trough
                # Let's create it anyway
                VMachineController.update_from_voldrv(new_name, storagedriver_id=storagedriver_id)
            vm = VMachineList.get_by_devicename_and_vpool(new_name, vpool)
            if vm is None:
                raise RuntimeError('Could not create vMachine on rename. Aborting.')
            try:
                VMachineController.sync_with_hypervisor(vm.guid, storagedriver_id=storagedriver_id)
                vm.status = 'SYNC'
            except:
                vm.status = 'SYNC_NOK'
            vm.save()
Beispiel #8
0
    def update_vmachine_name(instance_id, old_name, new_name):
        """
        Update a vMachine name: find vmachine by management center instance id, set new name
        :param instance_id: ID for the virtual machine known by management center
        :param old_name: Old name of the virtual machine
        :param new_name: New name for the virtual machine
        """
        vmachine = None
        for mgmt_center in MgmtCenterList.get_mgmtcenters():
            mgmt = Factory.get_mgmtcenter(mgmt_center = mgmt_center)
            try:
                machine_info = mgmt.get_vmachine_device_info(instance_id)
                file_name = machine_info['file_name']
                host_name = machine_info['host_name']
                vpool_name = machine_info['vpool_name']
                storage_router = StorageRouterList.get_by_name(host_name)
                machine_id = storage_router.machine_id
                device_name = '{0}/{1}'.format(machine_id, file_name)
                vp = VPoolList.get_vpool_by_name(vpool_name)
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, vp)
                if vmachine:
                    break
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, None)
                if vmachine:
                    break
            except Exception as ex:
                VMachineController._logger.info('Trying to get mgmt center failed for vmachine {0}. {1}'.format(old_name, ex))
        if not vmachine:
            VMachineController._logger.error('No vmachine found for name {0}'.format(old_name))
            return

        vpool = vmachine.vpool
        mutex = volatile_mutex('{0}_{1}'.format(old_name, vpool.guid if vpool is not None else 'none'))
        try:
            mutex.acquire(wait=5)
            vmachine.name = new_name
            vmachine.save()
        finally:
            mutex.release()
Beispiel #9
0
 def sync_with_hypervisor(vpool_guid):
     """
     Syncs all vMachines of a given vPool with the hypervisor
     """
     vpool = VPool(vpool_guid)
     for storagedriver in vpool.storagedrivers:
         pmachine = storagedriver.storagerouter.pmachine
         hypervisor = Factory.get(pmachine)
         for vm_object in hypervisor.get_vms_by_nfs_mountinfo(storagedriver.storage_ip, storagedriver.mountpoint):
             search_vpool = None if pmachine.hvtype == 'KVM' else vpool
             vmachine = VMachineList.get_by_devicename_and_vpool(
                 devicename=vm_object['backing']['filename'],
                 vpool=search_vpool
             )
             VMachineController.update_vmachine_config(vmachine, vm_object, pmachine)
Beispiel #10
0
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     logger.info("[SSA] started")
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid, label="", is_consistent=False, is_automatic=True)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     logger.info("[SSA] Snapshot has been taken for {0} vMachines, {1} failed.".format(len(success), len(fail)))
Beispiel #11
0
 def sync_with_hypervisor(vpool_guid):
     """
     Syncs all vMachines of a given vPool with the hypervisor
     :param vpool_guid: Guid of the vPool to synchronize
     """
     vpool = VPool(vpool_guid)
     if vpool.status != VPool.STATUSES.RUNNING:
         raise ValueError('Synchronizing with hypervisor is only allowed if your vPool is in {0} status'.format(VPool.STATUSES.RUNNING))
     for storagedriver in vpool.storagedrivers:
         pmachine = storagedriver.storagerouter.pmachine
         hypervisor = Factory.get(pmachine)
         for vm_object in hypervisor.get_vms_by_nfs_mountinfo(storagedriver.storage_ip, storagedriver.mountpoint):
             search_vpool = None if pmachine.hvtype == 'KVM' else vpool
             vmachine = VMachineList.get_by_devicename_and_vpool(devicename=vm_object['backing']['filename'],
                                                                 vpool=search_vpool)
             VMachineController.update_vmachine_config(vmachine, vm_object, pmachine)
    def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return Response(result, status=status.HTTP_200_OK)
    def can_be_deleted(self, storagedriver):
        """
        Checks whether a Storage Driver can be deleted
        """
        result = True
        storagerouter = storagedriver.storagerouter
        storagedrivers_left = len([sd for sd in storagerouter.storagedrivers if sd.guid != storagedriver.guid])
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        vpools_guids = [vmachine.vpool_guid for vmachine in vmachines if vmachine.vpool_guid is not None]
        pmachine_guids = [vmachine.pmachine_guid for vmachine in vmachines]
        vpool = storagedriver.vpool

        if storagedrivers_left is False and pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            result = False
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            result = False
        return result
Beispiel #14
0
 def list(self, vpoolguid=None, query=None):
     """
     Overview of all machines
     """
     if vpoolguid is not None:
         vpool = VPool(vpoolguid)
         vmachine_guids = []
         vmachines = []
         for vdisk in vpool.vdisks:
             if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids:
                 vmachine_guids.append(vdisk.vmachine.guid)
                 if vdisk.vmachine.is_vtemplate is False:
                     vmachines.append(vdisk.vmachine)
     elif query is not None:
         query = json.loads(query)
         vmachines = DataList(VMachine, query)
     else:
         vmachines = VMachineList.get_vmachines()
     return vmachines
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     logger.info('[SSA] started')
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid,
                                         label='',
                                         is_consistent=False,
                                         is_automatic=True)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     logger.info('[SSA] {0} vMachines were snapshotted, {1} failed.'.format(
         len(success), len(fail)))
Beispiel #16
0
 def list(self, vpoolguid=None, query=None):
     """
     Overview of all machines
     """
     if vpoolguid is not None:
         vpool = VPool(vpoolguid)
         vmachine_guids = []
         vmachines = []
         for vdisk in vpool.vdisks:
             if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids:
                 vmachine_guids.append(vdisk.vmachine.guid)
                 if vdisk.vmachine.is_vtemplate is False:
                     vmachines.append(vdisk.vmachine)
     elif query is not None:
         query = json.loads(query)
         vmachines = DataList(VMachine, query)
     else:
         vmachines = VMachineList.get_vmachines()
     return vmachines
Beispiel #17
0
    def delete_from_voldrv(name, storagedriver_id):
        """
        This method will delete a vmachine based on the name of the vmx given
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        if pmachine.hvtype == 'VMWARE':
            storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
            vpool = storagedriver.vpool
        else:
            vpool = None
        vm = VMachineList.get_by_devicename_and_vpool(name, vpool)
        if vm is not None:
            MessageController.fire(MessageController.Type.EVENT, {'type': 'vmachine_deleted',
                                                                  'metadata': {'name': vm.name}})
            vm.delete(abandon=['vdisks'])
Beispiel #18
0
 def snapshot_all_vms():
     """
     Snapshots all VMachines
     """
     ScheduledTaskController._logger.info('[SSA] started')
     success = []
     fail = []
     machines = VMachineList.get_customer_vmachines()
     for machine in machines:
         try:
             VMachineController.snapshot(machineguid=machine.guid,
                                         label='',
                                         is_consistent=False,
                                         is_automatic=True,
                                         is_sticky=False)
             success.append(machine.guid)
         except:
             fail.append(machine.guid)
     ScheduledTaskController._logger.info(
         '[SSA] Snapshot has been taken for {0} vMachines, {1} failed.'.
         format(len(success), len(fail)))
Beispiel #19
0
 def sync_with_hypervisor(vpool_guid):
     """
     Syncs all vMachines of a given vPool with the hypervisor
     :param vpool_guid: Guid of the vPool to synchronize
     """
     vpool = VPool(vpool_guid)
     if vpool.status != VPool.STATUSES.RUNNING:
         raise ValueError(
             'Synchronizing with hypervisor is only allowed if your vPool is in {0} status'
             .format(VPool.STATUSES.RUNNING))
     for storagedriver in vpool.storagedrivers:
         pmachine = storagedriver.storagerouter.pmachine
         hypervisor = Factory.get(pmachine)
         for vm_object in hypervisor.get_vms_by_nfs_mountinfo(
                 storagedriver.storage_ip, storagedriver.mountpoint):
             search_vpool = None if pmachine.hvtype == 'KVM' else vpool
             vmachine = VMachineList.get_by_devicename_and_vpool(
                 devicename=vm_object['backing']['filename'],
                 vpool=search_vpool)
             VMachineController.update_vmachine_config(
                 vmachine, vm_object, pmachine)
Beispiel #20
0
 def list(self, vpoolguid=None, query=None):
     """
     Overview of all machines
     """
     if vpoolguid is not None:
         vpool = VPool(vpoolguid)
         vmachine_guids = []
         vmachines = []
         for vdisk in vpool.vdisks:
             if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids:
                 vmachine_guids.append(vdisk.vmachine.guid)
                 if vdisk.vmachine.is_vtemplate is False:
                     vmachines.append(vdisk.vmachine)
     elif query is not None:
         query = json.loads(query)
         query_result = DataList({'object': VMachine,
                                  'data': DataList.select.GUIDS,
                                  'query': query}).data
         vmachines = DataObjectList(query_result, VMachine)
     else:
         vmachines = VMachineList.get_vmachines()
     return vmachines
Beispiel #21
0
 def list(self, vpoolguid=None, query=None):
     """
     Overview of all machines
     """
     if vpoolguid is not None:
         vpool = VPool(vpoolguid)
         vmachine_guids = []
         vmachines = []
         for vdisk in vpool.vdisks:
             if vdisk.vmachine_guid is not None and vdisk.vmachine_guid not in vmachine_guids:
                 vmachine_guids.append(vdisk.vmachine.guid)
                 if vdisk.vmachine.is_vtemplate is False:
                     vmachines.append(vdisk.vmachine)
     elif query is not None:
         query = json.loads(query)
         query_result = DataList({'object': VMachine,
                                  'data': DataList.select.GUIDS,
                                  'query': query}).data
         vmachines = DataObjectList(query_result, VMachine)
     else:
         vmachines = VMachineList.get_vmachines()
     return vmachines
Beispiel #22
0
    def update_from_voldrv(name, storagedriver_id):
        """
        This method will update/create a vmachine based on a given vmx/xml file
        """

        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        storagedriver = StorageDriverList.get_by_storagedriver_id(
            storagedriver_id)
        vpool = storagedriver.vpool
        machine_ids = [
            storagedriver.storagerouter.machine_id
            for storagedriver in vpool.storagedrivers
        ]

        if hypervisor.should_process(name, machine_ids=machine_ids):
            if pmachine.hvtype == 'VMWARE':
                storagedriver = StorageDriverList.get_by_storagedriver_id(
                    storagedriver_id)
                vpool = storagedriver.vpool
            else:
                vpool = None
            pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
            mutex = VolatileMutex('{}_{}'.format(
                name, vpool.guid if vpool is not None else 'none'))
            try:
                mutex.acquire(wait=120)
                limit = 5
                exists = hypervisor.file_exists(storagedriver, name)
                while limit > 0 and exists is False:
                    time.sleep(1)
                    exists = hypervisor.file_exists(storagedriver, name)
                    limit -= 1
                if exists is False:
                    logger.info(
                        'Could not locate vmachine with name {0} on vpool {1}'.
                        format(name, vpool.name))
                    vmachine = VMachineList.get_by_devicename_and_vpool(
                        name, vpool)
                    if vmachine is not None:
                        VMachineController.delete_from_voldrv(
                            name, storagedriver_id=storagedriver_id)
                    return
            finally:
                mutex.release()
            try:
                mutex.acquire(wait=5)
                vmachine = VMachineList.get_by_devicename_and_vpool(
                    name, vpool)
                if not vmachine:
                    vmachine = VMachine()
                    vmachine.vpool = vpool
                    vmachine.pmachine = pmachine
                    vmachine.status = 'CREATED'
                vmachine.devicename = name
                vmachine.save()
            finally:
                mutex.release()

            if pmachine.hvtype == 'KVM':
                try:
                    VMachineController.sync_with_hypervisor(
                        vmachine.guid, storagedriver_id=storagedriver_id)
                    vmachine.status = 'SYNC'
                except:
                    vmachine.status = 'SYNC_NOK'
                vmachine.save()
        else:
            logger.info('Ignored invalid file {0}'.format(name))
Beispiel #23
0
    def gather_scrub_work():
        logger.info("Divide scrubbing work among allowed Storage Routers")

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder)
                    )
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter.ip)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError("No scrub locations found")

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                vdisk_guids.add(vdisk.guid)

        logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format(
                    "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub)
                )
            )

            if local is True:
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub
                    ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id))
                )
                storage_router_list.append(storage_router)
                logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name))

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            ScheduledTaskController._execute_scrub_work(
                scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub
            )
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if result is not None:
                logger.error(
                    "Scrubbing failed on Storage Router {0} with error {1}".format(
                        storage_router_list[index].name, result
                    )
                )
Beispiel #24
0
 def _children(vmt):
     children = 0
     disks = [vd.guid for vd in vmt.vdisks]
     for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]:
         for disk in disks:
             if vdisk == disk:
                 children += 1
     return children
Beispiel #25
0
    def _bootstrap_dal_models(self):
        """
        Load/hook dal models as snmp oids
        """
        _guids = set()

        enabled_key = "{0}_config_dal_enabled".format(STORAGE_PREFIX)
        self.instance_oid = 0
        try:
            enabled = self.persistent.get(enabled_key)
        except KeyNotFoundException:
            enabled = True  # Enabled by default, can be disabled by setting the key
        if enabled:
            from ovs.dal.lists.vdisklist import VDiskList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.dal.lists.pmachinelist import PMachineList
            from ovs.dal.lists.vmachinelist import VMachineList
            from ovs.dal.lists.vpoollist import VPoolList
            from ovs.dal.lists.storagedriverlist import StorageDriverList

            for storagerouter in StorageRouterList.get_storagerouters():
                _guids.add(storagerouter.guid)
                if not self._check_added(storagerouter):
                    self._register_dal_model(10, storagerouter, 'guid', "0")
                    self._register_dal_model(10, storagerouter, 'name', "1")
                    self._register_dal_model(10, storagerouter, 'pmachine', "3", key = 'host_status')
                    self._register_dal_model(10, storagerouter, 'description', "4")
                    self._register_dal_model(10, storagerouter, 'devicename', "5")
                    self._register_dal_model(10, storagerouter, 'dtl_mode', "6")
                    self._register_dal_model(10, storagerouter, 'ip', "8")
                    self._register_dal_model(10, storagerouter, 'machineid', "9")
                    self._register_dal_model(10, storagerouter, 'status', "10")
                    self._register_dal_model(10, storagerouter, '#vdisks', "11",
                                             func = lambda storagerouter: len([vdisk for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#vmachines', "12",
                                             func = lambda storagerouter: len(set([vdisk.vmachine.guid for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id])),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#stored_data', "13",
                                             func = lambda storagerouter: sum([vdisk.vmachine.stored_data for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if vm.is_vtemplate:
                        self._register_dal_model(11, vm, 'guid', "0")
                        self._register_dal_model(11, vm, 'name', "1")

                        def _children(vmt):
                            children = 0
                            disks = [vd.guid for vd in vmt.vdisks]
                            for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]:
                                for disk in disks:
                                    if vdisk == disk:
                                        children += 1
                            return children
                        self._register_dal_model(11, vm, '#children', 2, func = _children, atype = int)
                        self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if not vm.is_vtemplate:
                        self._register_dal_model(0, vm, 'guid', "0")
                        self._register_dal_model(0, vm, 'name', "1")
                        self._register_dal_model(0, vm, 'statistics', "2.0", key = "operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.2", key = "data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.6", key = "write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.11", key = "backend_data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.12", key = "cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.16", key = "backend_data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.17", key = "data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.18", key = "read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.20", key = "data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.23", key = "timestamp", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.27", key = "data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.30", key = "operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.34", key = "data_transferred", atype = int)
                        self._register_dal_model(0, vm, 'stored_data', "3", atype = int)
                        self._register_dal_model(0, vm, 'description', "4")
                        self._register_dal_model(0, vm, 'devicename', "5")
                        self._register_dal_model(0, vm, 'dtl_mode', "6")
                        self._register_dal_model(0, vm, 'hypervisorid', "7")
                        self._register_dal_model(0, vm, 'ip', "8")
                        self._register_dal_model(0, vm, 'status', "10")
                        self._register_dal_model(0, vm, 'stored_data', "10", atype = int)
                        self._register_dal_model(0, vm, 'snapshots', "11", atype = int)
                        self._register_dal_model(0, vm, 'vdisks', "12", atype = int)
                        self._register_dal_model(0, vm, 'DTL', '13',
                                                 func = lambda vm: 'DEGRADED' if all(item == 'DEGRADED' for item in [vd.info['failover_mode'] for vd in vm.vdisks]) else 'OK')
                    self.instance_oid += 1

            for vd in VDiskList.get_vdisks():
                _guids.add(vd.guid)
                if not self._check_added(vd):
                    self._register_dal_model(1, vd, 'guid', "0")
                    self._register_dal_model(1, vd, 'name', "1")
                    self._register_dal_model(1, vd, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.1", key = "data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.20", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(1, vd, 'info', "3", key = 'stored', atype = int)
                    self._register_dal_model(1, vd, 'info', "4", key = 'failover_mode', atype = int)
                    self._register_dal_model(1, vd, 'snapshots', "5", atype = int)
                    self.instance_oid += 1

            for pm in PMachineList.get_pmachines():
                _guids.add(pm.guid)
                if not self._check_added(pm):
                    self._register_dal_model(2, pm, 'guid', "0")
                    self._register_dal_model(2, pm, 'name', "1")
                    self._register_dal_model(2, pm, 'host_status', "2")
                    self.instance_oid += 1

            for vp in VPoolList.get_vpools():
                _guids.add(vp.guid)
                if not self._check_added(vp):
                    self._register_dal_model(3, vp, 'guid', "0")
                    self._register_dal_model(3, vp, 'name', "1")
                    self._register_dal_model(3, vp, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.20", key = "data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(3, vp, 'status', "3")
                    self._register_dal_model(3, vp, 'description', "4")
                    self._register_dal_model(3, vp, 'vdisks', "5", atype = int)
                    self._register_dal_model(3, vp, '#vmachines', "6",
                                             func = lambda vp: len(set([vd.vmachine.guid for vd in vp.vdisks])),
                                             atype = int)
                    self.instance_oid += 1

            for storagedriver in StorageDriverList.get_storagedrivers():
                _guids.add(storagedriver.guid)
                if not self._check_added(storagedriver):
                    self._register_dal_model(4, storagedriver, 'guid', "0")
                    self._register_dal_model(4, storagedriver, 'name', "1")
                    self._register_dal_model(4, storagedriver, 'stored_data', "2", atype = int)
                    self.instance_oid += 1

            try:
                # try to load OVS Backends
                from ovs.dal.lists.albabackendlist import AlbaBackendList
                for backend in AlbaBackendList.get_albabackends():
                    _guids.add(backend.guid)
                    if not self._check_added(backend):
                        self._register_dal_model(5, backend, 'guid', 0)
                        self._register_dal_model(5, backend, 'name', 1)
                        for disk_id in range(len((backend.all_disks))):
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.0'.format(disk_id), key = "name", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.1'.format(disk_id), key = "usage.size", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.2'.format(disk_id), key = "usage.used", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.3'.format(disk_id), key = "usage.available", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.4'.format(disk_id), key = "state.state", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.5'.format(disk_id), key = "node_id", index=disk_id)

                        self.instance_oid += 1
            except ImportError:
                print('OVS Backend not present')
                pass
            reload = False
            for object_guid in list(self.model_oids):
                if object_guid not in _guids:
                    self.model_oids.remove(object_guid)
                    reload = True
            if reload:
                self._reload_snmp()
Beispiel #26
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info('Delete snapshots started')

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            """
            Create an integer based timestamp
            :param offset: Offset in days
            :return: Timestamp
            """
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': make_timestamp(day * i),
                            'end': make_timestamp(day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': make_timestamp(week * i),
                            'end': make_timestamp(week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': make_timestamp(week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])
        logger.info('Delete snapshots finished')
Beispiel #27
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                             vdisk_guids=vdisk_guids_to_scrub).apply_async(
                    routing_key='sr.{0}'.format(storage_router.machine_id)
                ))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))
        all_results = result_set.join(propagate=False)  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
Beispiel #28
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info('Delete snapshots started')

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            """
            Create an integer based timestamp
            :param offset: Offset in days
            :return: Timestamp
            """
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({
                'start': make_timestamp(day * i),
                'end': make_timestamp(day * (i + 1)),
                'type': '1d',
                'snapshots': []
            })
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({
                'start': make_timestamp(week * i),
                'end': make_timestamp(week * (i + 1)),
                'type': '1w',
                'snapshots': []
            })
        buckets.append({
            'start': make_timestamp(week * 4),
            'end': 0,
            'type': 'rest',
            'snapshots': []
        })

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE']
                   for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot[
                                    'snapshots'].iteritems():
                                bucket['snapshots'].append({
                                    'timestamp':
                                    timestamp,
                                    'snapshotid':
                                    snapshotguid,
                                    'diskguid':
                                    diskguid,
                                    'is_consistent':
                                    snapshot['is_consistent']
                                })
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    if snapshot.get('is_sticky') is True:
                        continue
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({
                                'timestamp':
                                timestamp,
                                'snapshotid':
                                snapshot['guid'],
                                'diskguid':
                                vdisk.guid,
                                'is_consistent':
                                snapshot['is_consistent']
                            })
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot[
                                'is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != best['timestamp']
                    ]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != oldest['timestamp']
                    ]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(
                        diskguid=snapshot['diskguid'],
                        snapshotid=snapshot['snapshotid'])
        logger.info('Delete snapshots finished')
Beispiel #29
0
    def delete_snapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete

        :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
        """

        logger.info("Delete snapshots started")

        day = timedelta(1)
        week = day * 7

        def make_timestamp(offset):
            return int(mktime((base - offset).timetuple()))

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        base = datetime.fromtimestamp(timestamp).date() - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append(
                {"start": make_timestamp(day * i), "end": make_timestamp(day * (i + 1)), "type": "1d", "snapshots": []}
            )
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append(
                {
                    "start": make_timestamp(week * i),
                    "end": make_timestamp(week * (i + 1)),
                    "type": "1w",
                    "snapshots": [],
                }
            )
        buckets.append({"start": make_timestamp(week * 4), "end": 0, "type": "rest", "snapshots": []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info["object_type"] in ["BASE"] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot["timestamp"])
                    for bucket in bucket_chain:
                        if bucket["start"] >= timestamp > bucket["end"]:
                            for diskguid, snapshotguid in snapshot["snapshots"].iteritems():
                                bucket["snapshots"].append(
                                    {
                                        "timestamp": timestamp,
                                        "snapshotid": snapshotguid,
                                        "diskguid": diskguid,
                                        "is_consistent": snapshot["is_consistent"],
                                    }
                                )
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info["object_type"] in ["BASE"]:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot["timestamp"])
                    for bucket in bucket_chain:
                        if bucket["start"] >= timestamp > bucket["end"]:
                            bucket["snapshots"].append(
                                {
                                    "timestamp": timestamp,
                                    "snapshotid": snapshot["guid"],
                                    "diskguid": vdisk.guid,
                                    "is_consistent": snapshot["is_consistent"],
                                }
                            )
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket["snapshots"]:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot["is_consistent"] and not best["is_consistent"]:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif (
                            snapshot["is_consistent"] == best["is_consistent"]
                            and snapshot["timestamp"] > best["timestamp"]
                        ):
                            best = snapshot
                    bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != best["timestamp"]]
                    first = False
                elif bucket["end"] > 0:
                    oldest = None
                    for snapshot in bucket["snapshots"]:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot["timestamp"] < oldest["timestamp"]:
                            oldest = snapshot
                    bucket["snapshots"] = [s for s in bucket["snapshots"] if s["timestamp"] != oldest["timestamp"]]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket["snapshots"]:
                    VDiskController.delete_snapshot(diskguid=snapshot["diskguid"], snapshotid=snapshot["snapshotid"])
        logger.info("Delete snapshots finished")
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(
            mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({
                'start': offset - (day * i),
                'end': offset - (day * (i + 1)),
                'type': '1d',
                'snapshots': []
            })
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({
                'start': offset - (week * i),
                'end': offset - (week * (i + 1)),
                'type': '1w',
                'snapshots': []
            })
        buckets.append({
            'start': offset - (week * 4),
            'end': 0,
            'type': 'rest',
            'snapshots': []
        })

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE']
                   for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot[
                                    'snapshots'].iteritems():
                                bucket['snapshots'].append({
                                    'timestamp':
                                    timestamp,
                                    'snapshotid':
                                    snapshotguid,
                                    'diskguid':
                                    diskguid,
                                    'is_consistent':
                                    snapshot['is_consistent']
                                })
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({
                                'timestamp':
                                timestamp,
                                'snapshotid':
                                snapshot['guid'],
                                'diskguid':
                                vdisk.guid,
                                'is_consistent':
                                snapshot['is_consistent']
                            })
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot[
                                'is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != best['timestamp']
                    ]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != oldest['timestamp']
                    ]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(
                        diskguid=snapshot['diskguid'],
                        snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(
                        vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(
                    vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[
                        vdisk.
                        storagedriver_id] = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug(
                        'MDS for volume {0} is not local. Trigger handover'.
                        format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info(
                            'Skipping scrubbing work unit for volume {0}: MDS master is not local'
                            .format(vdisk.volume_id))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(
                    str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(
                        work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(
                        scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info(
                    'Failed scrubbing work unit for volume {0}: {1}'.format(
                        vdisk.volume_id, ex))
Beispiel #31
0
    def create_from_template(name, machineguid, pmachineguid, description=None):
        """
        Create a new vmachine using an existing vmachine template

        :param machineguid: guid of the template vmachine
        :param name: name of new vmachine
        :param pmachineguid: guid of hypervisor to create new vmachine on
        :param description: Description for the machine
        :return: guid of the newly created vmachine | False on any failure
        """

        template_vm = VMachine(machineguid)
        if not template_vm.is_vtemplate:
            return False

        target_pm = PMachine(pmachineguid)
        target_hypervisor = Factory.get(target_pm)

        storagerouters = [sr for sr in StorageRouterList.get_storagerouters() if sr.pmachine_guid == target_pm.guid]
        if len(storagerouters) == 1:
            target_storagerouter = storagerouters[0]
        else:
            raise ValueError('Pmachine {0} has no StorageRouter assigned to it'.format(pmachineguid))
        routing_key = "sr.{0}".format(target_storagerouter.machine_id)

        vpool = None
        vpool_guids = set()
        if template_vm.vpool is not None:
            vpool = template_vm.vpool
            vpool_guids.add(vpool.guid)
        for disk in template_vm.vdisks:
            vpool = disk.vpool
            vpool_guids.add(vpool.guid)
        if len(vpool_guids) != 1:
            raise RuntimeError('Only 1 vpool supported on template disk(s) - {0} found!'.format(len(vpool_guids)))

        if not template_vm.pmachine.hvtype == target_pm.hvtype:
            raise RuntimeError('Source and target hypervisor not identical')

        # Currently, only one vPool is supported, so we can just use whatever the `vpool` variable above
        # was set to as 'the' vPool for the code below. This obviously will have to change once vPool mixes
        # are supported.

        target_storagedriver = None
        source_storagedriver = None
        for vpool_storagedriver in vpool.storagedrivers:
            if vpool_storagedriver.storagerouter.pmachine_guid == target_pm.guid:
                target_storagedriver = vpool_storagedriver
            if vpool_storagedriver.storagerouter.pmachine_guid == template_vm.pmachine_guid:
                source_storagedriver = vpool_storagedriver
        if target_storagedriver is None:
            raise RuntimeError('Volume not served on target hypervisor')

        source_hv = Factory.get(template_vm.pmachine)
        target_hv = Factory.get(target_pm)
        if not source_hv.is_datastore_available(source_storagedriver.storage_ip, source_storagedriver.mountpoint):
            raise RuntimeError('Datastore unavailable on source hypervisor')
        if not target_hv.is_datastore_available(target_storagedriver.storage_ip, target_storagedriver.mountpoint):
            raise RuntimeError('Datastore unavailable on target hypervisor')

        source_vm = source_hv.get_vm_object(template_vm.hypervisor_id)
        if not source_vm:
            raise RuntimeError('VM with key reference {0} not found'.format(template_vm.hypervisor_id))

        name_duplicates = VMachineList.get_vmachine_by_name(name)
        if name_duplicates is not None and len(name_duplicates) > 0:
            raise RuntimeError('A vMachine with name {0} already exists'.format(name))

        vm_path = target_hypervisor.get_vmachine_path(name, target_storagedriver.storagerouter.machine_id)

        new_vm = VMachine()
        new_vm.copy(template_vm)
        new_vm.hypervisor_id = ''
        new_vm.vpool = template_vm.vpool
        new_vm.pmachine = target_pm
        new_vm.name = name
        new_vm.description = description
        new_vm.is_vtemplate = False
        new_vm.devicename = target_hypervisor.clean_vmachine_filename(vm_path)
        new_vm.status = 'CREATED'
        new_vm.save()

        storagedrivers = [storagedriver for storagedriver in vpool.storagedrivers if storagedriver.storagerouter.pmachine_guid == new_vm.pmachine_guid]
        if len(storagedrivers) == 0:
            raise RuntimeError('Cannot find Storage Driver serving {0} on {1}'.format(vpool.name, new_vm.pmachine.name))

        disks = []
        disks_by_order = sorted(template_vm.vdisks, key=lambda x: x.order)
        for disk in disks_by_order:
            try:
                prefix = '{0}-clone'.format(disk.name)
                result = VDiskController.create_from_template(
                    diskguid=disk.guid,
                    devicename=prefix,
                    pmachineguid=target_pm.guid,
                    machinename=new_vm.name,
                    machineguid=new_vm.guid
                )
                disks.append(result)
                VMachineController._logger.debug('Disk appended: {0}'.format(result))
            except Exception as exception:
                VMachineController._logger.error('Creation of disk {0} failed: {1}'.format(disk.name, str(exception)), print_msg=True)
                VMachineController.delete.s(machineguid=new_vm.guid).apply_async(routing_key = routing_key)
                raise

        try:
            result = target_hv.create_vm_from_template(
                name, source_vm, disks, target_storagedriver.storage_ip, target_storagedriver.mountpoint, wait=True
            )
        except Exception as exception:
            VMachineController._logger.error('Creation of vm {0} on hypervisor failed: {1}'.format(new_vm.name, str(exception)), print_msg=True)
            VMachineController.delete.s(machineguid=new_vm.guid).apply_async(routing_key = routing_key)
            raise

        new_vm.hypervisor_id = result
        new_vm.status = 'SYNC'
        new_vm.save()
        return new_vm.guid
Beispiel #32
0
    def update_from_voldrv(name, storagedriver_id):
        """
        This method will update/create a vmachine based on a given vmx/xml file
        :param name: Name of the vmx
        :param storagedriver_id: Storage Driver hosting the vmachine
        """

        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        vpool = storagedriver.vpool
        machine_ids = [storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers]

        if hypervisor.should_process(name, machine_ids=machine_ids):
            if pmachine.hvtype == 'VMWARE':
                storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
                vpool = storagedriver.vpool
            else:
                vpool = None
            pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
            mutex = volatile_mutex('{0}_{1}'.format(name, vpool.guid if vpool is not None else 'none'))
            try:
                mutex.acquire(wait=120)
                limit = 5
                exists = hypervisor.file_exists(storagedriver, name)
                while limit > 0 and exists is False:
                    time.sleep(1)
                    exists = hypervisor.file_exists(storagedriver, name)
                    limit -= 1
                if exists is False:
                    VMachineController._logger.info('Could not locate vmachine with name {0} on vpool {1}'.format(name, vpool.name))
                    vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                    if vmachine is not None:
                        VMachineController.delete_from_voldrv(name, storagedriver_id=storagedriver_id)
                    return
            finally:
                mutex.release()
            try:
                mutex.acquire(wait=5)
                vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                if not vmachine:
                    vmachines = VMachineList.get_vmachine_by_name(name)
                    if vmachines is not None:
                        vmachine = vmachines[0]
                if not vmachine:
                    vmachine = VMachine()
                    vmachine.vpool = vpool
                    vmachine.pmachine = pmachine
                    vmachine.status = 'CREATED'
                vmachine.devicename = name
                vmachine.save()
            finally:
                mutex.release()

            if pmachine.hvtype == 'KVM':
                try:
                    mutex.acquire(wait=120)
                    VMachineController.sync_with_hypervisor(vmachine.guid, storagedriver_id=storagedriver_id)
                    vmachine.status = 'SYNC'
                except:
                    vmachine.status = 'SYNC_NOK'
                finally:
                    mutex.release()
                vmachine.save()
        else:
            VMachineController._logger.info('Ignored invalid file {0}'.format(name))
 def get_vmachines():
     """
     Retrieve all Virtual Machines
     :return: Virtual Machine data-object list
     """
     return VMachineList.get_vmachines()
Beispiel #34
0
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': offset - (day * i),
                            'end': offset - (day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': offset - (week * i),
                            'end': offset - (week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': offset - (week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug('MDS for volume {0} is not local. Trigger handover'.format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info('Skipping scrubbing work unit for volume {0}: MDS master is not local'.format(
                            vdisk.volume_id
                        ))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info('Failed scrubbing work unit for volume {0}: {1}'.format(
                    vdisk.volume_id, ex
                ))
Beispiel #35
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[
                                storage_driver.storagerouter] = str(
                                    partition.path)
                        except UnableToConnectException:
                            logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1],
                        vdisk_guids=vdisk_guids_to_scrub).apply_async(
                            routing_key='sr.{0}'.format(
                                storage_router.machine_id)))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
Beispiel #36
0
    def create_from_template(name,
                             machineguid,
                             pmachineguid,
                             description=None):
        """
        Create a new vmachine using an existing vmachine template

        @param machineguid: guid of the template vmachine
        @param name: name of new vmachine
        @param pmachineguid: guid of hypervisor to create new vmachine on
        @return: guid of the newly created vmachine | False on any failure
        """

        template_vm = VMachine(machineguid)
        if not template_vm.is_vtemplate:
            return False

        target_pm = PMachine(pmachineguid)
        target_hypervisor = Factory.get(target_pm)

        storagerouters = [
            sr for sr in StorageRouterList.get_storagerouters()
            if sr.pmachine_guid == target_pm.guid
        ]
        if len(storagerouters) == 1:
            target_storagerouter = storagerouters[0]
        else:
            raise ValueError(
                'Pmachine {} has no StorageRouter assigned to it'.format(
                    pmachineguid))
        routing_key = "sr.{0}".format(target_storagerouter.machine_id)

        vpool = None
        vpool_guids = set()
        if template_vm.vpool is not None:
            vpool = template_vm.vpool
            vpool_guids.add(vpool.guid)
        for disk in template_vm.vdisks:
            vpool = disk.vpool
            vpool_guids.add(vpool.guid)
        if len(vpool_guids) != 1:
            raise RuntimeError(
                'Only 1 vpool supported on template disk(s) - {0} found!'.
                format(len(vpool_guids)))

        if not template_vm.pmachine.hvtype == target_pm.hvtype:
            raise RuntimeError('Source and target hypervisor not identical')

        # Currently, only one vPool is supported, so we can just use whatever the `vpool` variable above
        # was set to as 'the' vPool for the code below. This obviously will have to change once vPool mixes
        # are supported.

        target_storagedriver = None
        source_storagedriver = None
        for vpool_storagedriver in vpool.storagedrivers:
            if vpool_storagedriver.storagerouter.pmachine_guid == target_pm.guid:
                target_storagedriver = vpool_storagedriver
            if vpool_storagedriver.storagerouter.pmachine_guid == template_vm.pmachine_guid:
                source_storagedriver = vpool_storagedriver
        if target_storagedriver is None:
            raise RuntimeError('Volume not served on target hypervisor')

        source_hv = Factory.get(template_vm.pmachine)
        target_hv = Factory.get(target_pm)
        if not source_hv.is_datastore_available(
                source_storagedriver.storage_ip,
                source_storagedriver.mountpoint):
            raise RuntimeError('Datastore unavailable on source hypervisor')
        if not target_hv.is_datastore_available(
                target_storagedriver.storage_ip,
                target_storagedriver.mountpoint):
            raise RuntimeError('Datastore unavailable on target hypervisor')

        source_vm = source_hv.get_vm_object(template_vm.hypervisor_id)
        if not source_vm:
            raise RuntimeError('VM with key reference {0} not found'.format(
                template_vm.hypervisor_id))

        name_duplicates = VMachineList.get_vmachine_by_name(name)
        if name_duplicates is not None and len(name_duplicates) > 0:
            raise RuntimeError(
                'A vMachine with name {0} already exists'.format(name))

        vm_path = target_hypervisor.get_vmachine_path(
            name, target_storagedriver.storagerouter.machine_id)

        new_vm = VMachine()
        new_vm.copy(template_vm)
        new_vm.hypervisor_id = ''
        new_vm.vpool = template_vm.vpool
        new_vm.pmachine = target_pm
        new_vm.name = name
        new_vm.description = description
        new_vm.is_vtemplate = False
        new_vm.devicename = target_hypervisor.clean_vmachine_filename(vm_path)
        new_vm.status = 'CREATED'
        new_vm.save()

        storagedrivers = [
            storagedriver for storagedriver in vpool.storagedrivers if
            storagedriver.storagerouter.pmachine_guid == new_vm.pmachine_guid
        ]
        if len(storagedrivers) == 0:
            raise RuntimeError(
                'Cannot find Storage Driver serving {0} on {1}'.format(
                    vpool.name, new_vm.pmachine.name))
        storagedriverguid = storagedrivers[0].guid

        disks = []
        disks_by_order = sorted(template_vm.vdisks, key=lambda x: x.order)
        try:
            for disk in disks_by_order:
                prefix = '{0}-clone'.format(disk.name)
                result = VDiskController.create_from_template(
                    diskguid=disk.guid,
                    devicename=prefix,
                    pmachineguid=target_pm.guid,
                    machinename=new_vm.name,
                    machineguid=new_vm.guid,
                    storagedriver_guid=storagedriverguid)
                disks.append(result)
                logger.debug('Disk appended: {0}'.format(result))
        except Exception as exception:
            logger.error('Creation of disk {0} failed: {1}'.format(
                disk.name, str(exception)),
                         print_msg=True)
            VMachineController.delete.s(machineguid=new_vm.guid).apply_async(
                routing_key=routing_key)
            raise

        try:
            result = target_hv.create_vm_from_template(
                name,
                source_vm,
                disks,
                target_storagedriver.storage_ip,
                target_storagedriver.mountpoint,
                wait=True)
        except Exception as exception:
            logger.error('Creation of vm {0} on hypervisor failed: {1}'.format(
                new_vm.name, str(exception)),
                         print_msg=True)
            VMachineController.delete.s(machineguid=new_vm.guid).apply_async(
                routing_key=routing_key)
            raise

        new_vm.hypervisor_id = result
        new_vm.status = 'SYNC'
        new_vm.save()
        return new_vm.guid
Beispiel #37
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status('workers', sshclient) is False:
                                ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                                              vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                       timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip))
                        result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location,
                                                                                                      vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                                                                                      routing_key='sr.{0}'.format(storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                                           vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex))
                else:
                    ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                            timesleep=60)  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': offset - (day * i),
                            'end': offset - (day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': offset - (week * i),
                            'end': offset - (week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': offset - (week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE']:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        for vdisk in vdisks:
            work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id))
            for work_unit in work_units:
                try:
                    total += 1
                    scrubbing_result = _storagedriver_scrubber.scrub(work_unit, vdisk.vpool.mountpoint_temp)
                    vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result)
                except:
                    failed += 1
                    logger.info('Failed scrubbing work unit for volume {}'.format(
                        vdisk.volume_id
                    ))

        logger.info('Scrubbing finished. {} out of {} items failed.'.format(
            failed, total
        ))
    def remove_storagedriver(storagedriver_guid):
        """
        Removes a Storage Driver (and, if it was the last Storage Driver for a vPool, the vPool is removed as well)
        """
        # Get objects & Make some checks
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        ip = storagerouter.ip
        pmachine = storagerouter.pmachine
        vmachines = VMachineList.get_customer_vmachines()
        pmachine_guids = [vm.pmachine_guid for vm in vmachines]
        vpools_guids = [vm.vpool_guid for vm in vmachines if vm.vpool_guid is not None]

        vpool = storagedriver.vpool
        if pmachine.guid in pmachine_guids and vpool.guid in vpools_guids:
            raise RuntimeError('There are still vMachines served from the given Storage Driver')
        if any(vdisk for vdisk in vpool.vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id):
            raise RuntimeError('There are still vDisks served from the given Storage Driver')

        services = ['volumedriver_{0}'.format(vpool.name),
                    'failovercache_{0}'.format(vpool.name)]
        storagedrivers_left = False

        # Stop services
        for current_storagedriver in vpool.storagedrivers:
            if current_storagedriver.guid != storagedriver_guid:
                storagedrivers_left = True
            client = SSHClient.load(current_storagedriver.storagerouter.ip)
            for service in services:
                System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.disable_service('{0}')
""".format(service))
                System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.stop_service('{0}')
""".format(service))

        # Unconfigure Cinder
        ovsdb = PersistentFactory.get_client()
        key = str('ovs_openstack_cinder_%s' % storagedriver.vpool_guid)
        if ovsdb.exists(key):
            cinder_password, cinder_user, tenant_name, controller_ip, _ = ovsdb.get(key)
            client = SSHClient.load(ip)
            System.exec_remote_python(client, """
from ovs.extensions.openstack.cinder import OpenStackCinder
osc = OpenStackCinder(cinder_password = '******', cinder_user = '******', tenant_name = '{2}', controller_ip = '{3}')
osc.unconfigure_vpool('{4}', '{5}', {6})
""".format(cinder_password, cinder_user, tenant_name, controller_ip, vpool.name, storagedriver.mountpoint, not storagedrivers_left))
            if not storagedrivers_left:
                ovsdb.delete(key)

        # KVM pool
        client = SSHClient.load(ip)
        if pmachine.hvtype == 'KVM':
            if vpool.name in client.run('virsh pool-list'):
                client.run('virsh pool-destroy {0}'.format(vpool.name))
            try:
                client.run('virsh pool-undefine {0}'.format(vpool.name))
            except:
                pass  # Ignore undefine errors, since that can happen on re-entrance

        # Remove services
        client = SSHClient.load(ip)
        for service in services:
            System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.remove_service(domain='openvstorage', name='{0}')
""".format(service))
        configuration_dir = System.read_remote_config(client, 'ovs.core.cfgdir')

        voldrv_arakoon_cluster_id = str(System.read_remote_config(client, 'volumedriver.arakoon.clusterid'))
        voldrv_arakoon_cluster = ArakoonManagementEx().getCluster(voldrv_arakoon_cluster_id)
        voldrv_arakoon_client_config = voldrv_arakoon_cluster.getClientConfig()
        arakoon_node_configs = []
        for arakoon_node in voldrv_arakoon_client_config.keys():
            arakoon_node_configs.append(ArakoonNodeConfig(arakoon_node,
                                                          voldrv_arakoon_client_config[arakoon_node][0][0],
                                                          voldrv_arakoon_client_config[arakoon_node][1]))
        vrouter_clusterregistry = ClusterRegistry(str(vpool.guid), voldrv_arakoon_cluster_id, arakoon_node_configs)
        # Reconfigure volumedriver
        if storagedrivers_left:
            node_configs = []
            for current_storagedriver in vpool.storagedrivers:
                if current_storagedriver.guid != storagedriver_guid:
                    node_configs.append(ClusterNodeConfig(str(current_storagedriver.storagedriver_id),
                                                          str(current_storagedriver.cluster_ip),
                                                          current_storagedriver.ports[0],
                                                          current_storagedriver.ports[1],
                                                          current_storagedriver.ports[2]))
            vrouter_clusterregistry.set_node_configs(node_configs)
        else:
            try:
                storagedriver_client = LocalStorageRouterClient('{0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name))
                storagedriver_client.destroy_filesystem()
                vrouter_clusterregistry.erase_node_configs()
            except RuntimeError as ex:
                print('Could not destroy filesystem or erase node configs due to error: {}'.format(ex))

        # Cleanup directories
        client = SSHClient.load(ip)
        client.run('rm -rf {}/read1_{}'.format(storagedriver.mountpoint_readcache1, vpool.name))
        if storagedriver.mountpoint_readcache2:
            client.run('rm -rf {}/read2_{}'.format(storagedriver.mountpoint_readcache2, vpool.name))
        client.run('rm -rf {}/sco_{}'.format(storagedriver.mountpoint_writecache, vpool.name))
        client.run('rm -rf {}/foc_{}'.format(storagedriver.mountpoint_foc, vpool.name))
        client.run('rm -rf {}/fd_{}'.format(storagedriver.mountpoint_writecache, vpool.name))
        client.run('rm -rf {}/metadata_{}'.format(storagedriver.mountpoint_md, vpool.name))
        client.run('rm -rf {}/tlogs_{}'.format(storagedriver.mountpoint_md, vpool.name))
        client.run('rm -rf /var/rsp/{}'.format(vpool.name))

        # Remove files
        client.run('rm -f {0}/voldrv_vpools/{1}.json'.format(configuration_dir, vpool.name))

        # Remove top directories
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache1))
        if storagedriver.mountpoint_readcache2:
            client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_readcache2))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_writecache))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_foc))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint_md))
        client.run('if [ -d {0} ] && [ ! "$(ls -A {0})" ]; then rmdir {0}; fi'.format(storagedriver.mountpoint))

        # First model cleanup
        storagedriver.delete(abandon=True)  # Detach from the log entries

        if storagedrivers_left:
            # Restart leftover services
            for current_storagedriver in vpool.storagedrivers:
                if current_storagedriver.guid != storagedriver_guid:
                    client = SSHClient.load(current_storagedriver.storagerouter.ip)
                    for service in services:
                        System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.enable_service('{0}')
""".format(service))
                        System.exec_remote_python(client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.start_service('{0}')
""".format(service))
        else:
            # Final model cleanup
            vpool.delete()
Beispiel #40
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status(
                                    'workers', sshclient) is False:
                                ScheduledTaskController._logger.warning(
                                    'Gather Scrub - Storage Router {0:<15} - workers are not running'
                                    .format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[
                                    storage_driver.storagerouter] = str(
                                        partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info(
                'Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.
                           ip] = ScheduledTaskController._execute_scrub_work.s(
                               scrub_location=scrub_info[1],
                               vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                   routing_key='sr.{0}'.format(
                                       storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(
            result_set,
            timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning(
                'Scrubbing failed on node {0}. Will reschedule on another node.'
                .format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info(
                            'Rescheduled scrub work from node {0} to node {1}.'
                            .format(failed_node, storage_router.ip))
                        result_set[
                            storage_router.
                            ip] = ScheduledTaskController._execute_scrub_work.s(
                                scrub_location=scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                    routing_key='sr.{0}'.format(
                                        storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(
                            ScheduledTaskController._execute_scrub_work(
                                scrub_location=local_scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'
                            .format(ex))
                else:
                    ScheduledTaskController._logger.warning(
                        'No nodes left to reschedule work from node {0}'.
                        format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(
                result_set, timesleep=60
            )  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error(
                        'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                        .format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')