Example #1
0
    def resize_from_voldrv(volumename, volumesize, volumepath,
                           storagedriver_id):
        """
        Resize a disk
        Triggered by volumedriver messages on the queue

        @param volumepath: path on hypervisor to the volume
        @param volumename: volume id of the disk
        @param volumesize: size of the volume
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        storagedriver = StorageDriverList.get_by_storagedriver_id(
            storagedriver_id)
        hypervisor = Factory.get(pmachine)
        volumepath = hypervisor.clean_backing_disk_filename(volumepath)
        mutex = VolatileMutex('{}_{}'.format(volumename, volumepath))
        try:
            mutex.acquire(wait=30)
            disk = VDiskList.get_vdisk_by_volume_id(volumename)
            if disk is None:
                disk = VDiskList.get_by_devicename_and_vpool(
                    volumepath, storagedriver.vpool)
                if disk is None:
                    disk = VDisk()
        finally:
            mutex.release()
        disk.devicename = volumepath
        disk.volume_id = volumename
        disk.size = volumesize
        disk.vpool = storagedriver.vpool
        disk.save()
        VDiskController.sync_with_mgmtcenter(disk, pmachine, storagedriver)
        MDSServiceController.ensure_safety(disk)
 def test_load_calculation(self):
     """
     Validates whether the load calculation works
     """
     vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure(
         {'vpools': [1],
          'storagerouters': [1],
          'storagedrivers': [(1, 1, 1)],  # (<id>, <vpool_id>, <sr_id>)
          'mds_services': [(1, 1)]}  # (<id>, <sd_id>)
     )
     mds_service = mds_services[1]
     self._create_vdisks_for_mds_service(2, 1, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 20, 'There should be a 20% load. {0}'.format(load))
     self.assertEqual(load_plus, 30, 'There should be a 30% plus load. {0}'.format(load_plus))
     self._create_vdisks_for_mds_service(3, 3, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load))
     self.assertEqual(load_plus, 60, 'There should be a 60% plus load. {0}'.format(load_plus))
     self._create_vdisks_for_mds_service(5, 6, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 100, 'There should be a 100% load. {0}'.format(load))
     self.assertEqual(load_plus, 110, 'There should be a 110% plus load. {0}'.format(load_plus))
     mds_service.capacity = -1
     mds_service.save()
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 50, 'There should be a 50% load. {0}'.format(load))
     self.assertEqual(load_plus, 50, 'There should be a 50% plus load. {0}'.format(load_plus))
     mds_service.capacity = 0
     mds_service.save()
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, float('inf'), 'There should be infinite load. {0}'.format(load))
     self.assertEqual(load_plus, float('inf'), 'There should be infinite plus load. {0}'.format(load_plus))
Example #3
0
    def resize_from_voldrv(volumename, volumesize, volumepath, storagedriver_id):
        """
        Resize a disk
        Triggered by volumedriver messages on the queue

        @param volumepath: path on hypervisor to the volume
        @param volumename: volume id of the disk
        @param volumesize: size of the volume
        """
        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        hypervisor = Factory.get(pmachine)
        volumepath = hypervisor.clean_backing_disk_filename(volumepath)
        mutex = VolatileMutex('{}_{}'.format(volumename, volumepath))
        try:
            mutex.acquire(wait=30)
            disk = VDiskList.get_vdisk_by_volume_id(volumename)
            if disk is None:
                disk = VDiskList.get_by_devicename_and_vpool(volumepath, storagedriver.vpool)
                if disk is None:
                    disk = VDisk()
        finally:
            mutex.release()
        disk.devicename = volumepath
        disk.volume_id = volumename
        disk.size = volumesize
        disk.vpool = storagedriver.vpool
        disk.save()
        VDiskController.sync_with_mgmtcenter(disk, pmachine, storagedriver)
        MDSServiceController.ensure_safety(disk)
 def _test_scenario(scenario, _vdisks, _mds_services):
     """
     Executes a test run for a given scenario
     """
     _generate_backend_config(scenario, _vdisks, _mds_services)
     for vdisk_id in _vdisks:
         MDSServiceController.sync_vdisk_to_reality(_vdisks[vdisk_id])
     _validate_scenario(scenario, _vdisks, _mds_services)
 def volumedriver_error(code, volumename):
     """
     Handles error messages/events from the volumedriver
     """
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volumename)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #6
0
 def _test_scenario(scenario, _vdisks, _mds_services):
     """
     Executes a testrun for a given scenario
     """
     _generate_backend_config(scenario, _vdisks, _mds_services)
     for vdisk_id in _vdisks:
         MDSServiceController.sync_vdisk_to_reality(_vdisks[vdisk_id])
     _validate_scenario(scenario, _vdisks, _mds_services)
Example #7
0
    def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None):
        """
        Clone a disk
        """
        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        description = '{} {}'.format(machinename, devicename)
        properties_to_clone = ['description', 'size', 'type', 'retentionpolicyguid',
                               'snapshotpolicyguid', 'autobackup']
        vdisk = VDisk(diskguid)
        location = hypervisor.get_backing_disk_path(machinename, devicename)

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{0}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location)
        new_vdisk.parentsnapshot = snapshotid
        new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.save()

        try:
            storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
            if storagedriver is None:
                raise RuntimeError('Could not find StorageDriver with id {0}'.format(vdisk.storagedriver_id))

            mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
            if mds_service is None:
                raise RuntimeError('Could not find a MDS service')

            logger.info('Clone snapshot {} of disk {} to location {}'.format(snapshotid, vdisk.name, location))
            volume_id = vdisk.storagedriver_client.create_clone(
                target_path=location,
                metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip),
                                                                                port=mds_service.service.ports[0])]),
                parent_volume_id=str(vdisk.volume_id),
                parent_snapshot_id=str(snapshotid),
                node_id=str(vdisk.storagedriver_id)
            )
        except Exception as ex:
            logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex))
            new_vdisk.delete()
            VDiskController.delete_volume(location)
            raise

        new_vdisk.volume_id = volume_id
        new_vdisk.save()

        try:
            MDSServiceController.ensure_safety(new_vdisk)
        except Exception as ex:
            logger.error('Caught exception during "ensure_safety" {0}'.format(ex))

        return {'diskguid': new_vdisk.guid,
                'name': new_vdisk.name,
                'backingdevice': location}
Example #8
0
 def volumedriver_error(code, volumename, storagedriver_id):
     """
     Handles error messages/events from the volumedriver
     """
     _ = storagedriver_id  # Required for the @log decorator
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volumename)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #9
0
 def volumedriver_error(code, volumename, storagedriver_id):
     """
     Handles error messages/events from the volumedriver
     """
     _ = storagedriver_id  # Required for the @log decorator
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volumename)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #10
0
 def volumedriver_error(code, volumename):
     """
     Handles error messages/events from the volumedriver
     :param code: Volumedriver error code
     :param volumename: Name of the volume throwing the error
     """
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volumename)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #11
0
    def _execute_scrub_work(scrub_location, vdisk_guids):
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics(['info'])
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        ScheduledTaskController._logger.info('Execute Scrub - Started')
        ScheduledTaskController._logger.info('Execute Scrub - Scrub location - {0}'.format(scrub_location))
        total = len(vdisk_guids)
        skipped = 0
        storagedrivers = {}
        failures = []
        for vdisk_guid in vdisk_guids:
            vdisk = VDisk(vdisk_guid)
            try:
                # Load the vDisk's StorageDriver
                ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Started'.format(vdisk.guid, vdisk.name))
                vdisk.invalidate_dynamics(['storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]

                # Load the vDisk's MDS configuration
                configs = _verify_mds_config(current_vdisk=vdisk)

                # Check MDS master is local. Trigger MDS handover if necessary
                if configs[0].get('ip') != storagedriver.storagerouter.ip:
                    ScheduledTaskController._logger.debug('Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover'.format(vdisk.guid, vdisk.name))
                    MDSServiceController.ensure_safety(vdisk)
                    configs = _verify_mds_config(current_vdisk=vdisk)
                    if configs[0].get('ip') != storagedriver.storagerouter.ip:
                        skipped += 1
                        ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local'.format(vdisk.guid, vdisk.name))
                        continue
                with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                    ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work'.format(vdisk.guid, vdisk.name))
                    work_units = locked_client.get_scrubbing_workunits()
                    for work_unit in work_units:
                        scrubbing_result = locked_client.scrub(work_unit, scrub_location, log_sinks=[SCRUBBER_LOGFILE_LOCATION])
                        locked_client.apply_scrubbing_result(scrubbing_result)
                    if work_units:
                        ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied'.format(vdisk.guid, vdisk.name))
                    else:
                        ScheduledTaskController._logger.info('Execute Scrub - Virtual disk {0} - {1} - No scrubbing required'.format(vdisk.guid, vdisk.name))
            except Exception as ex:
                failures.append('Failed scrubbing work unit for volume {0} with guid {1}: {2}'.format(vdisk.name, vdisk.guid, ex))

        failed = len(failures)
        ScheduledTaskController._logger.info('Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}'.format((total - failed - skipped), failed, skipped))
        if failed > 0:
            raise Exception('\n - '.join(failures))
        return vdisk_guids
Example #12
0
 def _set_mds_safety(vpool, safety=None, checkup=False, logger=LOGGER):
     if safety is None:
         safety = len(StoragerouterHelper.get_storagerouters())
     if safety <= 0:
         raise ValueError('Safety should be at least 1.')
     logger.debug('Setting the safety to {} and {} checkup'.format(safety, 'will' if checkup is True else 'false'))
     storagedriver_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format(vpool.guid))
     current_safety = storagedriver_config
     current_safety['mds_safety'] = safety
     Configuration.set('/ovs/framework/storagedriver', current_safety)
     if checkup is True:
         MDSServiceController.mds_checkup()
Example #13
0
 def volumedriver_error(code, volumename, storagedriver_id):
     """
     Handles error messages/events from the volumedriver
     :param code: Volumedriver error code
     :param volumename: Name of the volume throwing the error
     :param storagedriver_id: ID of the storagedriver hosting the volume
     """
     _ = storagedriver_id  # Required for the @log decorator
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volumename)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #14
0
    def _execute_scrub_work(scrub_location, vdisk_guids):
        def verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics(["info"])
            vdisk_configs = current_vdisk.info["metadata_backend_config"]
            if len(vdisk_configs) == 0:
                raise RuntimeError("Could not load MDS configuration")
            return vdisk_configs

        logger.info("Scrub location: {0}".format(scrub_location))
        total = len(vdisk_guids)
        skipped = 0
        storagedrivers = {}
        failures = []
        for vdisk_guid in vdisk_guids:
            vdisk = VDisk(vdisk_guid)
            try:
                # Load the vDisk's StorageDriver
                logger.info("Scrubbing virtual disk {0} with guid {1}".format(vdisk.name, vdisk.guid))
                vdisk.invalidate_dynamics(["storagedriver_id"])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(
                        vdisk.storagedriver_id
                    )
                storagedriver = storagedrivers[vdisk.storagedriver_id]

                # Load the vDisk's MDS configuration
                configs = verify_mds_config(current_vdisk=vdisk)

                # Check MDS master is local. Trigger MDS handover if necessary
                if configs[0].get("ip") != storagedriver.storagerouter.ip:
                    logger.debug("MDS for volume {0} is not local. Trigger handover".format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    configs = verify_mds_config(current_vdisk=vdisk)
                    if configs[0].get("ip") != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info(
                            "Skipping scrubbing work unit for volume {0}: MDS master is not local".format(
                                vdisk.volume_id
                            )
                        )
                        continue
                with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                    work_units = locked_client.get_scrubbing_workunits()
                    for work_unit in work_units:
                        scrubbing_result = locked_client.scrub(work_unit, scrub_location)
                        locked_client.apply_scrubbing_result(scrubbing_result)
                    if work_units:
                        logger.info("Scrubbing successfully applied")
            except Exception, ex:
                failures.append(
                    "Failed scrubbing work unit for volume {0} with guid {1}: {2}".format(vdisk.name, vdisk.guid, ex)
                )
Example #15
0
 def volumedriver_error(code, volume_id):
     """
     Handles error messages/events from the volumedriver
     :param code: Volumedriver error code
     :type code: int
     :param volume_id: Name of the volume throwing the error
     :type volume_id: str
     :return: None
     """
     if code == VolumeDriverEvents.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volume_id)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #16
0
 def volumedriver_error(code, volume_id):
     """
     Handles error messages/events from the volumedriver
     :param code: Volumedriver error code
     :type code: int
     :param volume_id: Name of the volume throwing the error
     :type volume_id: str
     :return: None
     """
     if code == VolumeDriverEvents_pb2.MDSFailover:
         disk = VDiskList.get_vdisk_by_volume_id(volume_id)
         if disk is not None:
             MDSServiceController.ensure_safety(disk)
Example #17
0
 def test_load_calculation(self):
     """
     Validates whether the load calculation works
     """
     vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure(
         {
             'vpools': [1],
             'storagerouters': [1],
             'storagedrivers': [(1, 1, 1)],  # (<id>, <vpool_id>, <sr_id>)
             'mds_services': [(1, 1)]
         }  # (<id>, <sd_id>)
     )
     mds_service = mds_services[1]
     self._create_vdisks_for_mds_service(2, 1, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 20,
                      'There should be a 20% load. {0}'.format(load))
     self.assertEqual(
         load_plus, 30,
         'There should be a 30% plus load. {0}'.format(load_plus))
     self._create_vdisks_for_mds_service(3, 3, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 50,
                      'There should be a 50% load. {0}'.format(load))
     self.assertEqual(
         load_plus, 60,
         'There should be a 60% plus load. {0}'.format(load_plus))
     self._create_vdisks_for_mds_service(5, 6, mds_service=mds_service)
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 100,
                      'There should be a 100% load. {0}'.format(load))
     self.assertEqual(
         load_plus, 110,
         'There should be a 110% plus load. {0}'.format(load_plus))
     mds_service.capacity = -1
     mds_service.save()
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, 50,
                      'There should be a 50% load. {0}'.format(load))
     self.assertEqual(
         load_plus, 50,
         'There should be a 50% plus load. {0}'.format(load_plus))
     mds_service.capacity = 0
     mds_service.save()
     load, load_plus = MDSServiceController.get_mds_load(mds_service)
     self.assertEqual(load, float('inf'),
                      'There should be infinite load. {0}'.format(load))
     self.assertEqual(
         load_plus, float('inf'),
         'There should be infinite plus load. {0}'.format(load_plus))
Example #18
0
    def get_stats_mds(cls):
        """
        Retrieve how many vDisks each MDS service is serving, whether as master or slave
        """
        if cls._config is None:
            cls.validate_and_retrieve_config()

        stats = []
        environment = cls._config['environment']
        service_type = ServiceTypeList.get_by_name('MetadataServer')
        if service_type is None:
            raise RuntimeError('MetadataServer service not found in the model')

        for service in service_type.services:
            slaves = 0
            masters = 0
            mds_service = service.mds_service
            for junction in mds_service.vdisks:
                if junction.is_master is True:
                    masters += 1
                else:
                    slaves += 1
            stats.append({'tags': {'vpool_name': mds_service.vpool.name,
                                   'mds_number': mds_service.number,
                                   'environment': environment,
                                   'storagerouter_name': service.storagerouter.name},
                          'fields': {'load': MDSServiceController.get_mds_load(mds_service)[0],
                                     'capacity': mds_service.capacity if mds_service.capacity != -1 else 'infinite',
                                     'masters': masters,
                                     'slaves': slaves},
                          'measurement': 'mds'})
        return False, stats
Example #19
0
    def clone(diskguid, snapshotid, devicename, pmachineguid, machinename, machineguid=None):
        """
        Clone a disk
        """
        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        description = "{} {}".format(machinename, devicename)
        properties_to_clone = ["description", "size", "type", "retentionpolicyguid", "snapshotpolicyguid", "autobackup"]
        vdisk = VDisk(diskguid)
        location = hypervisor.get_backing_disk_path(machinename, devicename)

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = "{0}-clone".format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location)
        new_vdisk.parentsnapshot = snapshotid
        new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.save()

        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
        if storagedriver is None:
            raise RuntimeError("Could not find StorageDriver with id {0}".format(vdisk.storagedriver_id))

        mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
        if mds_service is None:
            raise RuntimeError("Could not find a MDS service")

        logger.info("Clone snapshot {} of disk {} to location {}".format(snapshotid, vdisk.name, location))
        volume_id = vdisk.storagedriver_client.create_clone(
            target_path=location,
            metadata_backend_config=MDSMetaDataBackendConfig(
                [MDSNodeConfig(address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0])]
            ),
            parent_volume_id=str(vdisk.volume_id),
            parent_snapshot_id=str(snapshotid),
            node_id=str(vdisk.storagedriver_id),
        )
        new_vdisk.volume_id = volume_id
        new_vdisk.save()
        MDSServiceController.ensure_safety(new_vdisk)

        return {"diskguid": new_vdisk.guid, "name": new_vdisk.name, "backingdevice": location}
Example #20
0
    def migrate_from_voldrv(volume_id, new_owner_id):
        """
        Triggered when volume has changed owner (Clean migration or stolen due to other reason)
        Triggered by volumedriver messages

        :param volume_id:    Volume ID of the disk
        :type volume_id:     unicode

        :param new_owner_id: ID of the storage driver the volume migrated to
        :type new_owner_id:  unicode

        :returns:            None
        """
        sd = StorageDriverList.get_by_storagedriver_id(storagedriver_id=new_owner_id)
        vdisk = VDiskList.get_vdisk_by_volume_id(volume_id=volume_id)
        if vdisk is not None:
            logger.info('Migration - Guid {0} - ID {1} - Detected migration for virtual disk {2}'.format(vdisk.guid, vdisk.volume_id, vdisk.name))
            if sd is not None:
                logger.info('Migration - Guid {0} - ID {1} - Storage Router {2} is the new owner of virtual disk {3}'.format(vdisk.guid, vdisk.volume_id, sd.storagerouter.name, vdisk.name))
            MDSServiceController.mds_checkup()
            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)
Example #21
0
 def remove_mds_services(self):
     """
     Remove the MDS services related to the StorageDriver being deleted
     :return: A boolean indicating whether something went wrong
     :rtype: bool
     """
     # Removing MDS services
     self._logger.info('Removing MDS services')
     errors_found = False
     for mds_service in self.mds_services:
         try:
             self._logger.info(
                 'Remove MDS service (number {0}) for StorageRouter with IP {1}'
                 .format(mds_service.number,
                         self.sr_installer.storagerouter.ip))
             MDSServiceController.remove_mds_service(
                 mds_service=mds_service,
                 reconfigure=False,
                 allow_offline=self.sr_installer.root_client is
                 None)  # No root_client means the StorageRouter is offline
         except Exception:
             self._logger.exception('Removing MDS service failed')
             errors_found = True
     return errors_found
 def _generate_mds_service_load_repr(_mds_service):
     """
     Generates a load representing thing for a given mds_service
     """
     masters, slaves = 0, 0
     for _junction in _mds_service.vdisks:
         if _junction.is_master:
             masters += 1
         else:
             slaves += 1
     capacity = _mds_service.capacity
     if capacity == -1:
         capacity = 'infinite'
     _load, _ = MDSServiceController.get_mds_load(_mds_service)
     if _load == float('inf'):
         _load = 'infinite'
     else:
         _load = round(_load, 2)
     return [_mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load]
Example #23
0
 def _generate_mds_service_load_repr(_mds_service):
     """
     Generates a load representing thing for a given mds_service
     """
     masters, slaves = 0, 0
     for _junction in _mds_service.vdisks:
         if _junction.is_master:
             masters += 1
         else:
             slaves += 1
     capacity = _mds_service.capacity
     if capacity == -1:
         capacity = 'infinite'
     _load, _ = MDSServiceController.get_mds_load(_mds_service)
     if _load == float('inf'):
         _load = 'infinite'
     else:
         _load = round(_load, 2)
     return [
         _mds_service.service.storagerouter.ip,
         _mds_service.service.ports[0], masters, slaves, capacity, _load
     ]
Example #24
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            for disk in vm_object['disks']:
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                        if vdisk is None:
                            # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                            vdisk = VDisk()
                            vdisk.vpool = datastores[datastore].vpool
                            vdisk.reload_client()
                            vdisk.devicename = disk['filename']
                            vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                            vdisk.size = vdisk.info['volume_size']
                            MDSServiceController.ensure_safety(vdisk)
                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            logger.info('Updating vMachine finished (name {}, {} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
    def test_ensure_safety(self):
        """
        Validates whether the ensure_safety call works as expected
        """
        def _generate_mds_service_load_repr(_mds_service):
            """
            Generates a load representing thing for a given mds_service
            """
            masters, slaves = 0, 0
            for _junction in _mds_service.vdisks:
                if _junction.is_master:
                    masters += 1
                else:
                    slaves += 1
            capacity = _mds_service.capacity
            if capacity == -1:
                capacity = 'infinite'
            _load, _ = MDSServiceController.get_mds_load(_mds_service)
            if _load == float('inf'):
                _load = 'infinite'
            else:
                _load = round(_load, 2)
            return [_mds_service.service.storagerouter.ip, _mds_service.service.ports[0], masters, slaves, capacity, _load]

        def _check_reality(_configs, _loads, _vdisks, _mds_services, test=True, display=False):
            """
            Validates 'reality' with an expected config/load
            """
            reality_configs = []
            for _vdisk_id in _vdisks:
                reality_configs.append(_vdisks[_vdisk_id].info['metadata_backend_config'])
            if display is True:
                for c in reality_configs:
                    print c
            if test is True:
                self.assertListEqual(reality_configs, _configs)
            reality_loads = []
            for mds_id in _mds_services:
                reality_loads.append(_generate_mds_service_load_repr(_mds_services[mds_id]))
            if display is True:
                for l in reality_loads:
                    print l
            if test is True:
                self.assertListEqual(reality_loads, _loads)

        PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3)
        PersistentFactory.get_client().set('ovs.storagedriver.mds.maxload', 75)
        PersistentFactory.get_client().set('ovs.storagedriver.mds.tlogs', 100)
        vpools, storagerouters, storagedrivers, _, mds_services, service_type = self._build_service_structure(
            {'vpools': [1],
             'storagerouters': [1, 2, 3, 4],
             'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4)],  # (<id>, <vpool_id>, <sr_id>)
             'mds_services': [(1, 1), (2, 2), (3, 3), (4, 4)]}  # (<id>, <sd_id>)
        )
        vdisks = {}
        start_id = 1
        for mds_service in mds_services.itervalues():
            vdisks.update(self._create_vdisks_for_mds_service(2, start_id, mds_service=mds_service))
            start_id += 2

        # Validate the start configuration which is simple, each disk has only its default local master
        configs = [[{'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.2', 'port': 2}],
                   [{'ip': '10.0.0.2', 'port': 2}],
                   [{'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.4', 'port': 4}]]
        loads = [['10.0.0.1', 1, 2, 0, 10, 20.0],
                 ['10.0.0.2', 2, 2, 0, 10, 20.0],
                 ['10.0.0.3', 3, 2, 0, 10, 20.0],
                 ['10.0.0.4', 4, 2, 0, 10, 20.0]]
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate first run. Each disk should now have sufficient nodes, since there are plenty of MDS services available
        configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.2', 'port': 2}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 2}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 2}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 4, 10, 60.0],
                 ['10.0.0.3', 3, 2, 4, 10, 60.0],
                 ['10.0.0.4', 4, 2, 3, 10, 50.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether this extra (unnecessary) run doesn't change anything, preventing reconfiguring over and
        # over again
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validating whether an overloaded node is correctly rebalanced
        mds_services[2].capacity = 2
        mds_services[2].save()
        configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether the overloaded services are still handled. In this case, causing a re-order of the slaves as
        # ordered in the model
        configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Again, validating whether a subsequent run doesn't give unexpected changes
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # A MDS service will be added (next to the overloaded service), this should cause the expected to be rebalanced
        s_id = '{0}-5'.format(storagerouters[2].name)
        service = Service()
        service.name = s_id
        service.storagerouter = storagerouters[2]
        service.ports = [5]
        service.type = service_type
        service.save()
        mds_service = MDSService()
        mds_service.service = service
        mds_service.number = 0
        mds_service.capacity = 10
        mds_service.vpool = vpools[1]
        mds_service.save()
        mds_services[5] = mds_service
        configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0],
                 ['10.0.0.2', 5, 0, 3, 10, 30.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # If the tlogs are not caught up, nothing should be changed
        for vdisk_id in [3, 4]:
            StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 1000
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # The next run, after tlogs are caught up, a master switch should be executed
        for vdisk_id in [3, 4]:
            StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 50
        configs = [[{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 1, 0, 2, 50.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0],
                 ['10.0.0.2', 5, 1, 1, 10, 20.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether a volume migration makes the master follow
        StorageDriverClient.vrouter_id[vdisks[1].volume_id] = storagedrivers[3].storagedriver_id
        configs = [[{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.2', 'port': 5}],
                   [{'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 5}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.3', 'port': 3}],
                   [{'ip': '10.0.0.2', 'port': 2}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}, {'ip': '10.0.0.4', 'port': 4}],
                   [{'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.1', 'port': 1}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.2', 'port': 5}],
                   [{'ip': '10.0.0.4', 'port': 4}, {'ip': '10.0.0.3', 'port': 3}, {'ip': '10.0.0.1', 'port': 1}]]
        loads = [['10.0.0.1', 1, 1, 6, 10, 70.0],
                 ['10.0.0.2', 2, 1, 0, 2, 50.0],
                 ['10.0.0.3', 3, 3, 4, 10, 70.0],
                 ['10.0.0.4', 4, 2, 4, 10, 60.0],
                 ['10.0.0.2', 5, 1, 2, 10, 30.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validates if a second run doesn't change anything
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)
Example #26
0
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({'start': offset - (day * i),
                            'end': offset - (day * (i + 1)),
                            'type': '1d',
                            'snapshots': []})
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({'start': offset - (week * i),
                            'end': offset - (week * (i + 1)),
                            'type': '1w',
                            'snapshots': []})
        buckets.append({'start': offset - (week * 4),
                        'end': 0,
                        'type': 'rest',
                        'snapshots': []})

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE'] for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                                bucket['snapshots'].append({'timestamp': timestamp,
                                                            'snapshotid': snapshotguid,
                                                            'diskguid': diskguid,
                                                            'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({'timestamp': timestamp,
                                                        'snapshotid': snapshot['guid'],
                                                        'diskguid': vdisk.guid,
                                                        'is_consistent': snapshot['is_consistent']})
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot['is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != best['timestamp']]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [s for s in bucket['snapshots'] if
                                           s['timestamp'] != oldest['timestamp']]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(diskguid=snapshot['diskguid'],
                                                    snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[vdisk.storagedriver_id] = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug('MDS for volume {0} is not local. Trigger handover'.format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info('Skipping scrubbing work unit for volume {0}: MDS master is not local'.format(
                            vdisk.volume_id
                        ))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info('Failed scrubbing work unit for volume {0}: {1}'.format(
                    vdisk.volume_id, ex
                ))
 def test_storagedriver_config_set(self):
     """
     Validates whether storagedriver configuration is generated as expected
     """
     PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3)
     vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure(
         {'vpools': [1, 2],
          'storagerouters': [1, 2, 3, 4, 5, 6],
          'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4), (5, 2, 4), (6, 2, 5), (7, 2, 6)],  # (<id>, <vpool_id>, <sr_id>)
          'mds_services': [(1, 1), (2, 1), (3, 2), (4, 3), (5, 4), (6, 5), (7, 6), (8, 7), (9, 7)]}  # (<id>, <sd_id>)
     )
     vdisks = {}
     start_id = 1
     for mds_service in mds_services.itervalues():
         vdisks.update(self._create_vdisks_for_mds_service(10, start_id, mds_service=mds_service))
         start_id += 10
     mds_services[1].capacity = 11  # on 1, vpool 1
     mds_services[1].save()
     mds_services[2].capacity = 20  # on 1, vpool 1
     mds_services[2].save()
     mds_services[3].capacity = 12  # on 2, vpool 1
     mds_services[3].save()
     mds_services[4].capacity = 14  # on 3, vpool 1
     mds_services[4].save()
     mds_services[5].capacity = 16  # on 4, vpool 1
     mds_services[5].save()
     mds_services[6].capacity = 11  # on 4, vpool 2
     mds_services[6].save()
     mds_services[7].capacity = 13  # on 5, vpool 2
     mds_services[7].save()
     mds_services[8].capacity = 19  # on 6, vpool 2
     mds_services[8].save()
     mds_services[9].capacity = 15  # on 6, vpool 2
     mds_services[9].save()
     config = MDSServiceController.get_mds_storagedriver_config_set(vpools[1])
     expected = {storagerouters[1].guid: [{'host': '10.0.0.1', 'port': 2},
                                          {'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.3', 'port': 4}],
                 storagerouters[2].guid: [{'host': '10.0.0.2', 'port': 3},
                                          {'host': '10.0.0.1', 'port': 2},
                                          {'host': '10.0.0.4', 'port': 5}],
                 storagerouters[3].guid: [{'host': '10.0.0.3', 'port': 4},
                                          {'host': '10.0.0.1', 'port': 2},
                                          {'host': '10.0.0.4', 'port': 5}],
                 storagerouters[4].guid: [{'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.1', 'port': 2},
                                          {'host': '10.0.0.3', 'port': 4}]}
     self.assertDictEqual(config, expected, 'Test 1. Got:\n{0}'.format(json.dumps(config, indent=2)))
     mds_services[2].capacity = 10  # on 1, vpool 1
     mds_services[2].save()
     config = MDSServiceController.get_mds_storagedriver_config_set(vpools[1])
     expected = {storagerouters[1].guid: [{'host': '10.0.0.1', 'port': 1},
                                          {'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.3', 'port': 4}],
                 storagerouters[2].guid: [{'host': '10.0.0.2', 'port': 3},
                                          {'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.3', 'port': 4}],
                 storagerouters[3].guid: [{'host': '10.0.0.3', 'port': 4},
                                          {'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.2', 'port': 3}],
                 storagerouters[4].guid: [{'host': '10.0.0.4', 'port': 5},
                                          {'host': '10.0.0.3', 'port': 4},
                                          {'host': '10.0.0.2', 'port': 3}]}
     self.assertDictEqual(config, expected, 'Test 2. Got:\n{0}'.format(json.dumps(config, indent=2)))
Example #28
0
    def test_ensure_safety(self):
        """
        Validates whether the ensure_safety call works as expected
        """
        def _generate_mds_service_load_repr(_mds_service):
            """
            Generates a load representing thing for a given mds_service
            """
            masters, slaves = 0, 0
            for _junction in _mds_service.vdisks:
                if _junction.is_master:
                    masters += 1
                else:
                    slaves += 1
            capacity = _mds_service.capacity
            if capacity == -1:
                capacity = 'infinite'
            _load, _ = MDSServiceController.get_mds_load(_mds_service)
            if _load == float('inf'):
                _load = 'infinite'
            else:
                _load = round(_load, 2)
            return [
                _mds_service.service.storagerouter.ip,
                _mds_service.service.ports[0], masters, slaves, capacity, _load
            ]

        def _check_reality(_configs,
                           _loads,
                           _vdisks,
                           _mds_services,
                           test=True,
                           display=False):
            """
            Validates 'reality' with an expected config/load
            """
            reality_configs = []
            for _vdisk_id in _vdisks:
                reality_configs.append(
                    _vdisks[_vdisk_id].info['metadata_backend_config'])
            if display is True:
                for c in reality_configs:
                    print c
            if test is True:
                self.assertListEqual(reality_configs, _configs)
            reality_loads = []
            for mds_id in _mds_services:
                reality_loads.append(
                    _generate_mds_service_load_repr(_mds_services[mds_id]))
            if display is True:
                for l in reality_loads:
                    print l
            if test is True:
                self.assertListEqual(reality_loads, _loads)

        PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3)
        PersistentFactory.get_client().set('ovs.storagedriver.mds.maxload', 75)
        PersistentFactory.get_client().set('ovs.storagedriver.mds.tlogs', 100)
        vpools, storagerouters, storagedrivers, _, mds_services, service_type = self._build_service_structure(
            {
                'vpools': [1],
                'storagerouters': [1, 2, 3, 4],
                'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3),
                                   (4, 1, 4)],  # (<id>, <vpool_id>, <sr_id>)
                'mds_services': [(1, 1), (2, 2), (3, 3), (4, 4)]
            }  # (<id>, <sd_id>)
        )
        vdisks = {}
        start_id = 1
        for mds_service in mds_services.itervalues():
            vdisks.update(
                self._create_vdisks_for_mds_service(2,
                                                    start_id,
                                                    mds_service=mds_service))
            start_id += 2

        # Validate the start configuration which is simple, each disk has only its default local master
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }], [{
            'ip': '10.0.0.1',
            'port': 1
        }], [{
            'ip': '10.0.0.2',
            'port': 2
        }], [{
            'ip': '10.0.0.2',
            'port': 2
        }], [{
            'ip': '10.0.0.3',
            'port': 3
        }], [{
            'ip': '10.0.0.3',
            'port': 3
        }], [{
            'ip': '10.0.0.4',
            'port': 4
        }], [{
            'ip': '10.0.0.4',
            'port': 4
        }]]
        loads = [['10.0.0.1', 1, 2, 0, 10, 20.0],
                 ['10.0.0.2', 2, 2, 0, 10, 20.0],
                 ['10.0.0.3', 3, 2, 0, 10, 20.0],
                 ['10.0.0.4', 4, 2, 0, 10, 20.0]]
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate first run. Each disk should now have sufficient nodes, since there are plenty of MDS services available
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.2',
            'port': 2
        }, {
            'ip': '10.0.0.3',
            'port': 3
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.2',
                       'port': 2
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.2',
                       'port': 2
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.2',
                       'port': 2
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 4, 10, 60.0],
                 ['10.0.0.3', 3, 2, 4, 10, 60.0],
                 ['10.0.0.4', 4, 2, 3, 10, 50.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether this extra (unnessecairy) run doesn't change anything, preventing reconfiguring over and
        # over again
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validating whether an overloaded node will cause correct rebalancing
        mds_services[2].capacity = 2
        mds_services[2].save()
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.3',
            'port': 3
        }, {
            'ip': '10.0.0.4',
            'port': 4
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether the overloaded services are still handled. In this case, causing a reoder of the slaves as
        # ordered in the model
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.3',
            'port': 3
        }, {
            'ip': '10.0.0.4',
            'port': 4
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Again, validating whether a subsequent run doesn't give unexpected changes
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # A MDS service will be added (next to the overloaded service), this should cause the expected rebalancing
        s_id = '{0}-5'.format(storagerouters[2].name)
        service = Service()
        service.name = s_id
        service.storagerouter = storagerouters[2]
        service.ports = [5]
        service.type = service_type
        service.save()
        mds_service = MDSService()
        mds_service.service = service
        mds_service.number = 0
        mds_service.capacity = 10
        mds_service.vpool = vpools[1]
        mds_service.save()
        mds_services[5] = mds_service
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.3',
            'port': 3
        }, {
            'ip': '10.0.0.4',
            'port': 4
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.2',
                       'port': 5
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.2',
                       'port': 5
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.2',
                       'port': 5
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 2, 5, 10, 70.0],
                 ['10.0.0.2', 2, 2, 0, 2, 100.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0],
                 ['10.0.0.2', 5, 0, 3, 10, 30.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # If the tlogs are not catched up, nothing should be changed
        for vdisk_id in [3, 4]:
            StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 1000
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # The next run, after tlogs are catched up, a master switch should be executed
        for vdisk_id in [3, 4]:
            StorageDriverClient.catch_up[vdisks[vdisk_id].volume_id] = 50
        configs = [[{
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.3',
            'port': 3
        }, {
            'ip': '10.0.0.4',
            'port': 4
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 5
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.2',
                       'port': 5
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 2, 5, 10,
                  70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0],
                 ['10.0.0.3', 3, 2, 5, 10, 70.0],
                 ['10.0.0.4', 4, 2, 5, 10, 70.0],
                 ['10.0.0.2', 5, 1, 1, 10, 20.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validate whether a volume migration makes the master follow
        StorageDriverClient.vrouter_id[
            vdisks[1].volume_id] = storagedrivers[3].storagedriver_id
        configs = [[{
            'ip': '10.0.0.3',
            'port': 3
        }, {
            'ip': '10.0.0.1',
            'port': 1
        }, {
            'ip': '10.0.0.2',
            'port': 5
        }],
                   [{
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 5
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }],
                   [{
                       'ip': '10.0.0.2',
                       'port': 2
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }],
                   [{
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.2',
                       'port': 5
                   }],
                   [{
                       'ip': '10.0.0.4',
                       'port': 4
                   }, {
                       'ip': '10.0.0.3',
                       'port': 3
                   }, {
                       'ip': '10.0.0.1',
                       'port': 1
                   }]]
        loads = [['10.0.0.1', 1, 1, 6, 10,
                  70.0], ['10.0.0.2', 2, 1, 0, 2, 50.0],
                 ['10.0.0.3', 3, 3, 4, 10, 70.0],
                 ['10.0.0.4', 4, 2, 4, 10, 60.0],
                 ['10.0.0.2', 5, 1, 2, 10, 30.0]]
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)

        # Validates if a second run doesn't change anything
        for vdisk_id in sorted(vdisks.keys()):
            MDSServiceController.ensure_safety(vdisks[vdisk_id])
        _check_reality(configs, loads, vdisks, mds_services)
Example #29
0
    def create_from_template(diskguid, devicename, pmachineguid, machinename='', machineguid=None):
        """
        Create a disk from a template

        :param diskguid: Guid of the disk
        :param machinename: Name of the machine
        :param devicename: Device file name for the disk (eg: my_disk-flat.vmdk)
        :param pmachineguid: Guid of pmachine to create new vdisk on
        :param machineguid: Guid of the machine to assign disk to
        :return diskguid: Guid of new disk
        """
        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        if machineguid is not None:
            new_vdisk_vmachine = VMachine(machineguid)
            machinename = new_vdisk_vmachine.name
        disk_path = hypervisor.get_disk_path(machinename, devicename)

        description = '{0} {1}'.format(machinename, devicename)
        properties_to_clone = [
            'description', 'size', 'type', 'retentionpolicyid',
            'snapshotpolicyid', 'vmachine', 'vpool']

        vdisk = VDisk(diskguid)
        if vdisk.vmachine and not vdisk.vmachine.is_vtemplate:
            # Disk might not be attached to a vmachine, but still be a template
            raise RuntimeError('The given vdisk does not belong to a template')
        if not vdisk.is_vtemplate:
            raise RuntimeError('The given vdisk is not a template')

        storagedriver = None
        for sd in vdisk.vpool.storagedrivers:
            if sd.storagerouter_guid in pmachine.storagerouters_guids:
                storagedriver = sd
                break

        if storagedriver is None:
            raise RuntimeError('Could not find Storage Driver')

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{0}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.vmachine = new_vdisk_vmachine if machineguid else vdisk.vmachine
        new_vdisk.save()

        mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, new_vdisk.vpool)
        if mds_service is None:
            raise RuntimeError('Could not find a MDS service')

        logger.info('Create disk from template {0} to new disk {1} to location {2}'.format(vdisk.name, new_vdisk.name, disk_path))

        try:
            backend_config = MDSNodeConfig(address=str(mds_service.service.storagerouter.ip),
                                           port=mds_service.service.ports[0])
            volume_id = vdisk.storagedriver_client.create_clone_from_template(target_path=disk_path,
                                                                              metadata_backend_config=MDSMetaDataBackendConfig([backend_config]),
                                                                              parent_volume_id=str(vdisk.volume_id),
                                                                              node_id=str(storagedriver.storagedriver_id))
            new_vdisk.volume_id = volume_id
            new_vdisk.save()
            MDSServiceController.ensure_safety(new_vdisk)
            VDiskController.dtl_checkup.delay(vdisk_guid=new_vdisk.guid)
        except Exception as ex:
            logger.error('Clone disk on volumedriver level failed with exception: {0}'.format(str(ex)))
            try:
                VDiskController.clean_bad_disk(new_vdisk.guid)
            except Exception as ex2:
                logger.exception('Exception during exception handling of "create_clone_from_template" : {0}'.format(str(ex2)))
            raise ex

        return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name,
                'backingdevice': disk_path}
Example #30
0
    def shrink_vpool(cls,
                     storagedriver_guid,
                     offline_storage_router_guids=list()):
        """
        Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well)
        :param storagedriver_guid: Guid of the StorageDriver to remove
        :type storagedriver_guid: str
        :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster.
                                             WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS
        :type offline_storage_router_guids: list
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        # TODO: Unit test individual pieces of code
        # Validations
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        cls._logger.info(
            'StorageDriver {0} - Deleting StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        vp_installer = VPoolInstaller(name=storagedriver.vpool.name)
        vp_installer.validate(storagedriver=storagedriver)

        sd_installer = StorageDriverInstaller(vp_installer=vp_installer,
                                              storagedriver=storagedriver)

        cls._logger.info(
            'StorageDriver {0} - Checking availability of related StorageRouters'
            .format(storagedriver.guid, storagedriver.name))
        sr_client_map = SSHClient.get_clients(endpoints=[
            sd.storagerouter for sd in vp_installer.vpool.storagedrivers
        ],
                                              user_names=['root'])
        sr_installer = StorageRouterInstaller(root_client=sr_client_map.get(
            storagerouter, {}).get('root'),
                                              storagerouter=storagerouter,
                                              vp_installer=vp_installer,
                                              sd_installer=sd_installer)

        offline_srs = sr_client_map.pop('offline')
        if sorted([sr.guid for sr in offline_srs
                   ]) != sorted(offline_storage_router_guids):
            raise RuntimeError('Not all StorageRouters are reachable')

        if storagerouter not in offline_srs:
            mtpt_pids = sr_installer.root_client.run(
                "lsof -t +D '/mnt/{0}' || true".format(
                    vp_installer.name.replace(r"'", r"'\''")),
                allow_insecure=True).splitlines()
            if len(mtpt_pids) > 0:
                raise RuntimeError(
                    'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}'
                    .format(', '.join(mtpt_pids)))

        # Retrieve reachable StorageDrivers
        reachable_storagedrivers = []
        for sd in vp_installer.vpool.storagedrivers:
            if sd.storagerouter not in sr_client_map:
                # StorageRouter is offline
                continue

            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                vp_installer.vpool.guid, sd.storagedriver_id)
            if Configuration.exists(sd_key) is True:
                path = Configuration.get_configuration_path(sd_key)
                with remote(sd.storagerouter.ip,
                            [LocalStorageRouterClient]) as rem:
                    try:
                        lsrc = rem.LocalStorageRouterClient(path)
                        lsrc.server_revision(
                        )  # 'Cheap' call to verify whether volumedriver is responsive
                        cls._logger.info(
                            'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}'
                            .format(storagedriver.guid, sd.name,
                                    sd.storagerouter.ip))
                        reachable_storagedrivers.append(sd)
                    except Exception as exception:
                        if not is_connection_failure(exception):
                            raise

        if len(reachable_storagedrivers) == 0:
            raise RuntimeError(
                'Could not find any responsive node in the cluster')

        # Start removal
        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.SHRINKING)
        else:
            vp_installer.update_status(status=VPool.STATUSES.DELETING)

        # Clean up stale vDisks
        cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format(
            storagedriver.guid))
        VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool)

        # Reconfigure the MDSes
        cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format(
            storagedriver.guid))
        for vdisk_guid in storagerouter.vdisks_guids:
            try:
                MDSServiceController.ensure_safety(
                    vdisk_guid=vdisk_guid,
                    excluded_storagerouter_guids=[storagerouter.guid] +
                    offline_storage_router_guids)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed'
                    .format(storagedriver.guid, vdisk_guid))

        # Validate that all MDSes on current StorageRouter have been moved away
        # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code
        vdisks = []
        for mds in vp_installer.mds_services:
            for junction in mds.vdisks:
                vdisk = junction.vdisk
                if vdisk in vdisks:
                    continue
                vdisks.append(vdisk)
                cls._logger.critical(
                    'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away'
                    .format(storagedriver.guid, vdisk.guid, vdisk.name))
        if len(vdisks) > 0:
            # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
            raise RuntimeError(
                'Not all MDS Services have been successfully migrated away')

        # Start with actual removal
        errors_found = False
        if storagerouter not in offline_srs:
            errors_found &= sd_installer.stop_services()

        errors_found &= vp_installer.configure_cluster_registry(
            exclude=[storagedriver], apply_on=reachable_storagedrivers)
        errors_found &= vp_installer.update_node_distance_map()
        errors_found &= vp_installer.remove_mds_services()
        errors_found &= sd_installer.clean_config_management()
        errors_found &= sd_installer.clean_model()

        if storagerouter not in offline_srs:
            errors_found &= sd_installer.clean_directories(
                mountpoints=StorageRouterController.get_mountpoints(
                    client=sr_installer.root_client))

            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=storagerouter.guid)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - Synchronizing disks with reality failed'
                    .format(storagedriver.guid))
                errors_found = True

        if vp_installer.storagedriver_amount > 1:
            # Update the vPool metadata and run DTL checkup
            vp_installer.vpool.metadata['caching_info'].pop(
                sr_installer.storagerouter.guid, None)
            vp_installer.vpool.save()

            try:
                VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                            ensure_single_timeout=600)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}'
                    .format(storagedriver.guid, vp_installer.name,
                            vp_installer.vpool.guid))
        else:
            cls._logger.info(
                'StorageDriver {0} - Removing vPool from model'.format(
                    storagedriver.guid))
            # Clean up model
            try:
                vp_installer.vpool.delete()
            except Exception:
                errors_found = True
                cls._logger.exception(
                    'StorageDriver {0} - Cleaning up vPool from the model failed'
                    .format(storagedriver.guid))
            Configuration.delete('/ovs/vpools/{0}'.format(
                vp_installer.vpool.guid))

        cls._logger.info('StorageDriver {0} - Running MDS checkup'.format(
            storagedriver.guid))
        try:
            MDSServiceController.mds_checkup()
        except Exception:
            cls._logger.exception(
                'StorageDriver {0} - MDS checkup failed'.format(
                    storagedriver.guid))

        # Update vPool status
        if errors_found is True:
            if vp_installer.storagedriver_amount > 1:
                vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise RuntimeError(
                '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information'
            )

        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info(
            'StorageDriver {0} - Deleted StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        if len(VPoolList.get_vpools()) == 0:
            cluster_name = ArakoonInstaller.get_cluster_name('voldrv')
            if ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                    cluster_name=cluster_name)['internal'] is True:
                cls._logger.debug(
                    'StorageDriver {0} - Removing Arakoon cluster {1}'.format(
                        storagedriver.guid, cluster_name))
                try:
                    installer = ArakoonInstaller(cluster_name=cluster_name)
                    installer.load()
                    installer.delete_cluster()
                except Exception:
                    cls._logger.exception(
                        'StorageDriver {0} - Delete voldrv Arakoon cluster failed'
                        .format(storagedriver.guid))
                service_type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                for service in list(service_type.services):
                    if service.name == service_name:
                        service.delete()

        # Remove watcher volumedriver service if last StorageDriver on current StorageRouter
        if len(
                storagerouter.storagedrivers
        ) == 0 and storagerouter not in offline_srs:  # ensure client is initialized for StorageRouter
            try:
                if cls._service_manager.has_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client):
                    cls._service_manager.stop_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
                    cls._service_manager.remove_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - {1} service deletion failed'.format(
                        storagedriver.guid,
                        ServiceFactory.SERVICE_WATCHER_VOLDRV))
Example #31
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(
                    MessageController.Type.EVENT, {
                        'type': 'vmachine_created',
                        'metadata': {
                            'name': vm_object['name']
                        }
                    })
            elif vmachine.name != vm_object['name']:
                MessageController.fire(
                    MessageController.Type.EVENT, {
                        'type': 'vmachine_renamed',
                        'metadata': {
                            'old_name': vmachine.name,
                            'new_name': vm_object['name']
                        }
                    })
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{}:{}'.format(storagedriver.storage_ip,
                                               storagedriver.mountpoint),
                                storagedriver)
                               for storagedriver in storagedrivers])
            vdisk_guids = []
            for disk in vm_object['disks']:
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        vdisk = VDiskList.get_by_devicename_and_vpool(
                            disk['filename'], datastores[datastore].vpool)
                        if vdisk is None:
                            # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                            vdisk = VDisk()
                            vdisk.vpool = datastores[datastore].vpool
                            vdisk.reload_client()
                            vdisk.devicename = disk['filename']
                            vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(
                                str(disk['backingfilename']))
                            vdisk.size = vdisk.info['volume_size']
                            MDSServiceController.ensure_safety(vdisk)
                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(
                                MessageController.Type.EVENT, {
                                    'type': 'vdisk_attached',
                                    'metadata': {
                                        'vmachine_name': vmachine.name,
                                        'vdisk_name': disk['name']
                                    }
                                })
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(
                        MessageController.Type.EVENT, {
                            'type': 'vdisk_detached',
                            'metadata': {
                                'vmachine_name': vmachine.name,
                                'vdisk_name': vdisk.name
                            }
                        })
                    vdisk.vmachine = None
                    vdisk.save()

            logger.info(
                'Updating vMachine finished (name {}, {} vdisks (re)linked)'.
                format(vmachine.name, vdisks_synced))
        except Exception as ex:
            logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Example #32
0
    def clone(diskguid, snapshotid, devicename, pmachineguid, machinename=None, machineguid=None, detached=False):
        """
        Clone a disk
        """
        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        if machinename is None:
            description = devicename
        else:
            description = '{0} {1}'.format(machinename, devicename)
        properties_to_clone = ['description', 'size', 'type', 'retentionpolicyguid',
                               'snapshotpolicyguid', 'autobackup']
        vdisk = VDisk(diskguid)
        location = hypervisor.get_backing_disk_path(machinename, devicename)

        if machineguid is not None and detached is True:
            raise ValueError('A vMachine GUID was specified while detached is True')

        if snapshotid is None:
            # Create a new snapshot
            timestamp = str(int(time.time()))
            metadata = {'label': '',
                        'is_consistent': False,
                        'timestamp': timestamp,
                        'machineguid': machineguid,
                        'is_automatic': True}
            VDiskController.create_snapshot(diskguid, metadata)
            tries = 25  # About 5 minutes
            while snapshotid is None and tries > 0:
                tries -= 1
                time.sleep(25 - tries)
                vdisk.invalidate_dynamics(['snapshots'])
                snapshots = [snapshot for snapshot in vdisk.snapshots
                             if snapshot['in_backend'] is True and snapshot['timestamp'] == timestamp]
                if len(snapshots) == 1:
                    snapshotid = snapshots[0]['guid']
            if snapshotid is None:
                raise RuntimeError('Could not find created snapshot in time')

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{0}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location)
        new_vdisk.parentsnapshot = snapshotid
        if detached is False:
            new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.save()

        try:
            storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
            if storagedriver is None:
                raise RuntimeError('Could not find StorageDriver with id {0}'.format(vdisk.storagedriver_id))

            mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
            if mds_service is None:
                raise RuntimeError('Could not find a MDS service')

            logger.info('Clone snapshot {0} of disk {1} to location {2}'.format(snapshotid, vdisk.name, location))
            volume_id = vdisk.storagedriver_client.create_clone(
                target_path=location,
                metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip),
                                                                                port=mds_service.service.ports[0])]),
                parent_volume_id=str(vdisk.volume_id),
                parent_snapshot_id=str(snapshotid),
                node_id=str(vdisk.storagedriver_id)
            )
        except Exception as ex:
            logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex))
            new_vdisk.delete()
            VDiskController.delete_volume(location)
            raise

        new_vdisk.volume_id = volume_id
        new_vdisk.save()

        try:
            MDSServiceController.ensure_safety(new_vdisk)
        except Exception as ex:
            logger.error('Caught exception during "ensure_safety" {0}'.format(ex))

        return {'diskguid': new_vdisk.guid,
                'name': new_vdisk.name,
                'backingdevice': location}
Example #33
0
    def _execute_scrub(queue, vpool, scrub_info, scrub_dir, error_messages):
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        storagerouter = scrub_info['storage_router']
        partition_guid = scrub_info['partition_guid']
        volatile_client = VolatileFactory.get_client()
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, partition_guid)
        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(
                        False
                    )  # Raises Empty Exception when queue is empty, so breaking the while True loop
                    volatile_key = 'ovs_scrubbing_vdisk_{0}'.format(vdisk_guid)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        GenericController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_dir))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            GenericController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                GenericController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Check if vDisk is already being scrubbed
                        if volatile_client.add(key=volatile_key,
                                               value=volatile_key,
                                               time=24 * 60 * 60) is False:
                            GenericController._logger.warning(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because vDisk is already being scrubbed'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            GenericController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_dir,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber_{0}'.format(vpool.name),
                                            allow_override=True,
                                            forced_target_type='file')
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                GenericController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                GenericController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        GenericController._logger.exception(message)
                    finally:
                        # Remove vDisk from volatile memory
                        volatile_client.delete(volatile_key)

        except Empty:  # Raised when all items have been fetched from the queue
            GenericController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            GenericController._logger.exception(message)
Example #34
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """

        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(scrub_directory, 0777)  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
                    port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True)

                    params = {'VPOOL_NAME': vpool.name,
                              'LOG_SINK': LogHandler.get_sink_path('alba_proxy'),
                              'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)}
                    ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service, client=client)
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))

                backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service, client=client)
                ServiceManager.remove_service(name=alba_proxy_service, client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                        if configs[0].get('ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name))
                            MDSServiceController.ensure_safety(VDisk(vdisk_guid))  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get('ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits()
                            for work_unit in work_units:
                                res = locked_client.scrub(work_unit=work_unit,
                                                          scratch_dir=scrub_directory,
                                                          log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)],
                                                          backend_config=Configuration.get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service, client=client):
                    ServiceManager.stop_service(alba_proxy_service, client=client)
                    ServiceManager.remove_service(alba_proxy_service, client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
Example #35
0
    def _execute_scrub_work(scrub_location, vdisk_guids):
        def verify_mds_config(current_vdisk):
            """
            Retrieve the metadata backend configuration for vDisk
            :param current_vdisk: vDisk to retrieve configuration for
            :type current_vdisk:  vDisk

            :return:              MDS configuration for vDisk
            """
            current_vdisk.invalidate_dynamics(['info'])
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        logger.info('Execute Scrub - Started')
        logger.info(
            'Execute Scrub - Scrub location - {0}'.format(scrub_location))
        total = len(vdisk_guids)
        skipped = 0
        storagedrivers = {}
        failures = []
        for vdisk_guid in vdisk_guids:
            vdisk = VDisk(vdisk_guid)
            try:
                # Load the vDisk's StorageDriver
                logger.info(
                    'Execute Scrub - Virtual disk {0} - {1} - Started'.format(
                        vdisk.guid, vdisk.name))
                vdisk.invalidate_dynamics(['storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[
                        vdisk.
                        storagedriver_id] = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]

                # Load the vDisk's MDS configuration
                configs = verify_mds_config(current_vdisk=vdisk)

                # Check MDS master is local. Trigger MDS handover if necessary
                if configs[0].get('ip') != storagedriver.storagerouter.ip:
                    logger.debug(
                        'Execute Scrub - Virtual disk {0} - {1} - MDS master is not local, trigger handover'
                        .format(vdisk.guid, vdisk.name))
                    MDSServiceController.ensure_safety(vdisk)
                    configs = verify_mds_config(current_vdisk=vdisk)
                    if configs[0].get('ip') != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info(
                            'Execute Scrub - Virtual disk {0} - {1} - Skipping because master MDS still not local'
                            .format(vdisk.guid, vdisk.name))
                        continue
                with vdisk.storagedriver_client.make_locked_client(
                        str(vdisk.volume_id)) as locked_client:
                    logger.info(
                        'Execute Scrub - Virtual disk {0} - {1} - Retrieve and apply scrub work'
                        .format(vdisk.guid, vdisk.name))
                    work_units = locked_client.get_scrubbing_workunits()
                    for work_unit in work_units:
                        scrubbing_result = locked_client.scrub(
                            work_unit, scrub_location)
                        locked_client.apply_scrubbing_result(scrubbing_result)
                    if work_units:
                        logger.info(
                            'Execute Scrub - Virtual disk {0} - {1} - Scrub successfully applied'
                            .format(vdisk.guid, vdisk.name))
                    else:
                        logger.info(
                            'Execute Scrub - Virtual disk {0} - {1} - No scrubbing required'
                            .format(vdisk.guid, vdisk.name))
            except Exception as ex:
                failures.append(
                    'Failed scrubbing work unit for volume {0} with guid {1}: {2}'
                    .format(vdisk.name, vdisk.guid, ex))

        failed = len(failures)
        logger.info(
            'Execute Scrub - Finished - Success: {0} - Failed: {1} - Skipped: {2}'
            .format((total - failed - skipped), failed, skipped))
        if failed > 0:
            raise Exception('\n - '.join(failures))
        return vdisk_guids
Example #36
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Example #37
0
    def add_vpool(cls, parameters):
        """
        Add a vPool to the machine this task is running on
        :param parameters: Parameters for vPool creation
        :type parameters: dict
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters))
        # VALIDATIONS
        if not isinstance(parameters, dict):
            raise ValueError(
                'Parameters passed to create a vPool should be of type dict')

        # Check StorageRouter existence
        storagerouter = StorageRouterList.get_by_ip(
            ip=parameters.get('storagerouter_ip'))
        if storagerouter is None:
            raise RuntimeError('Could not find StorageRouter')

        # Validate requested vPool configurations
        vp_installer = VPoolInstaller(name=parameters.get('vpool_name'))
        vp_installer.validate(storagerouter=storagerouter)

        # Validate requested StorageDriver configurations
        cls._logger.info(
            'vPool {0}: Validating StorageDriver configurations'.format(
                vp_installer.name))
        sd_installer = StorageDriverInstaller(
            vp_installer=vp_installer,
            configurations={
                'storage_ip': parameters.get('storage_ip'),
                'caching_info': parameters.get('caching_info'),
                'backend_info': {
                    'main':
                    parameters.get('backend_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('backend_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('backend_info_fc')
                },
                'connection_info': {
                    'main':
                    parameters.get('connection_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('connection_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('connection_info_fc')
                },
                'sd_configuration': parameters.get('config_params')
            })

        partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format(
            storagerouter.guid))
        try:
            # VPOOL CREATION
            # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state)
            if vp_installer.is_new is True:
                vp_installer.create(rdma_enabled=sd_installer.rdma_enabled)
                vp_installer.configure_mds(
                    config=parameters.get('mds_config_params', {}))
            else:
                vp_installer.update_status(status=VPool.STATUSES.EXTENDING)

            # ADDITIONAL VALIDATIONS
            # Check StorageRouter connectivity
            cls._logger.info(
                'vPool {0}: Validating StorageRouter connectivity'.format(
                    vp_installer.name))
            linked_storagerouters = [storagerouter]
            if vp_installer.is_new is False:
                linked_storagerouters += [
                    sd.storagerouter
                    for sd in vp_installer.vpool.storagedrivers
                ]

            sr_client_map = SSHClient.get_clients(
                endpoints=linked_storagerouters, user_names=['ovs', 'root'])
            offline_nodes = sr_client_map.pop('offline')
            if storagerouter in offline_nodes:
                raise RuntimeError(
                    'Node on which the vPool is being {0} is not reachable'.
                    format('created'
                           if vp_installer.is_new is True else 'extended'))

            sr_installer = StorageRouterInstaller(
                root_client=sr_client_map[storagerouter]['root'],
                sd_installer=sd_installer,
                vp_installer=vp_installer,
                storagerouter=storagerouter)

            # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context
            partitions_mutex.acquire(wait=60)
            sr_installer.partition_info = StorageRouterController.get_partition_info(
                storagerouter_guid=storagerouter.guid)
            sr_installer.validate_vpool_extendable()
            sr_installer.validate_global_write_buffer(
                requested_size=parameters.get('writecache_size', 0))
            sr_installer.validate_local_cache_size(
                requested_proxies=parameters.get('parallelism', {}).get(
                    'proxies', 2))

            # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS
            sd_installer.create()
            sd_installer.create_partitions()
            partitions_mutex.release()

            vp_installer.refresh_metadata()
        except Exception:
            cls._logger.exception(
                'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}'
                .format(vp_installer.name, storagerouter.name))
            partitions_mutex.release()
            vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            raise

        # Arakoon setup
        counter = 0
        while counter < 300:
            try:
                if StorageDriverController.manual_voldrv_arakoon_checkup(
                ) is True:
                    break
            except Exception:
                cls._logger.exception(
                    'Arakoon checkup for voldrv cluster failed')
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise
            counter += 1
            time.sleep(1)
            if counter == 300:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise RuntimeError(
                    'Arakoon checkup for the StorageDriver cluster could not be started'
                )

        # Cluster registry
        try:
            vp_installer.configure_cluster_registry(allow_raise=True)
        except Exception:
            if vp_installer.is_new is True:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            else:
                vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE)
            raise

        try:
            sd_installer.setup_proxy_configs()
            sd_installer.configure_storagedriver_service()
            DiskController.sync_with_reality(storagerouter.guid)
            MDSServiceController.prepare_mds_service(
                storagerouter=storagerouter, vpool=vp_installer.vpool)

            # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver)
            vp_installer.vpool.invalidate_dynamics('configuration')
            if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[
                    'mds_config']['mds_safety'] != vp_installer.mds_safety:
                Configuration.set(
                    key='/ovs/vpools/{0}/mds_config|mds_safety'.format(
                        vp_installer.vpool.guid),
                    value=vp_installer.mds_safety)

            sd_installer.start_services(
            )  # Create and start watcher volumedriver, DTL, proxies and StorageDriver services

            # Post creation/extension checkups
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                vpool=vp_installer.vpool, offline_nodes=offline_nodes)
            for sr, clients in sr_client_map.iteritems():
                for current_storagedriver in [
                        sd for sd in sr.storagedrivers
                        if sd.vpool_guid == vp_installer.vpool.guid
                ]:
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid=vp_installer.vpool.guid,
                        storagedriver_id=current_storagedriver.storagedriver_id
                    )
                    if storagedriver_config.config_missing is False:
                        # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                        # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                        storagedriver_config.configure_filesystem(
                            fs_metadata_backend_mds_nodes=mds_config_set[
                                sr.guid])
                        storagedriver_config.save(client=clients['ovs'])

            # Everything's reconfigured, refresh new cluster configuration
            for current_storagedriver in vp_installer.vpool.storagedrivers:
                if current_storagedriver.storagerouter not in sr_client_map:
                    continue
                vp_installer.vpool.storagedriver_client.update_cluster_node_configs(
                    str(current_storagedriver.storagedriver_id),
                    req_timeout_secs=10)
        except Exception:
            cls._logger.exception('vPool {0}: Creation failed'.format(
                vp_installer.name))
            vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise

        # When a node is offline, we can run into errors, but also when 1 or more volumes are not running
        # Scheduled tasks below, so don't really care whether they succeed or not
        try:
            VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                        ensure_single_timeout=600)
        except:
            pass
        for vdisk in vp_installer.vpool.vdisks:
            try:
                MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
            except:
                pass
        vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info('Add vPool {0} ended successfully'.format(
            vp_installer.name))
Example #38
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Example #39
0
    def deletescrubsnapshots(timestamp=None):
        """
        Delete snapshots & scrubbing policy

        Implemented delete snapshot policy:
        < 1d | 1d bucket | 1 | best of bucket   | 1d
        < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w
        < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m
        > 1m | delete
        """

        logger.info('Delete snapshots started')

        day = 60 * 60 * 24
        week = day * 7

        # Calculate bucket structure
        if timestamp is None:
            timestamp = time.time()
        offset = int(
            mktime(datetime.fromtimestamp(timestamp).date().timetuple())) - day
        buckets = []
        # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[
        for i in xrange(0, 7):
            buckets.append({
                'start': offset - (day * i),
                'end': offset - (day * (i + 1)),
                'type': '1d',
                'snapshots': []
            })
        # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[
        for i in xrange(1, 4):
            buckets.append({
                'start': offset - (week * i),
                'end': offset - (week * (i + 1)),
                'type': '1w',
                'snapshots': []
            })
        buckets.append({
            'start': offset - (week * 4),
            'end': 0,
            'type': 'rest',
            'snapshots': []
        })

        # Place all snapshots in bucket_chains
        bucket_chains = []
        for vmachine in VMachineList.get_customer_vmachines():
            if any(vd.info['object_type'] in ['BASE']
                   for vd in vmachine.vdisks):
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vmachine.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            for diskguid, snapshotguid in snapshot[
                                    'snapshots'].iteritems():
                                bucket['snapshots'].append({
                                    'timestamp':
                                    timestamp,
                                    'snapshotid':
                                    snapshotguid,
                                    'diskguid':
                                    diskguid,
                                    'is_consistent':
                                    snapshot['is_consistent']
                                })
                bucket_chains.append(bucket_chain)

        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE']:
                bucket_chain = copy.deepcopy(buckets)
                for snapshot in vdisk.snapshots:
                    timestamp = int(snapshot['timestamp'])
                    for bucket in bucket_chain:
                        if bucket['start'] >= timestamp > bucket['end']:
                            bucket['snapshots'].append({
                                'timestamp':
                                timestamp,
                                'snapshotid':
                                snapshot['guid'],
                                'diskguid':
                                vdisk.guid,
                                'is_consistent':
                                snapshot['is_consistent']
                            })
                bucket_chains.append(bucket_chain)

        # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep
        # And we'll remove all snapshots that remain in the buckets
        for bucket_chain in bucket_chains:
            first = True
            for bucket in bucket_chain:
                if first is True:
                    best = None
                    for snapshot in bucket['snapshots']:
                        if best is None:
                            best = snapshot
                        # Consistent is better than inconsistent
                        elif snapshot[
                                'is_consistent'] and not best['is_consistent']:
                            best = snapshot
                        # Newer (larger timestamp) is better than older snapshots
                        elif snapshot['is_consistent'] == best['is_consistent'] and \
                                snapshot['timestamp'] > best['timestamp']:
                            best = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != best['timestamp']
                    ]
                    first = False
                elif bucket['end'] > 0:
                    oldest = None
                    for snapshot in bucket['snapshots']:
                        if oldest is None:
                            oldest = snapshot
                        # Older (smaller timestamp) is the one we want to keep
                        elif snapshot['timestamp'] < oldest['timestamp']:
                            oldest = snapshot
                    bucket['snapshots'] = [
                        s for s in bucket['snapshots']
                        if s['timestamp'] != oldest['timestamp']
                    ]

        # Delete obsolete snapshots
        for bucket_chain in bucket_chains:
            for bucket in bucket_chain:
                for snapshot in bucket['snapshots']:
                    VDiskController.delete_snapshot(
                        diskguid=snapshot['diskguid'],
                        snapshotid=snapshot['snapshotid'])

        logger.info('Delete snapshots finished')
        logger.info('Scrubbing started')

        vdisks = []
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] in ['BASE'] and len(
                        vdisk.child_vdisks) == 0:
                    vdisks.append(vdisk)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] in ['BASE'] and len(
                    vdisk.child_vdisks) == 0:
                vdisks.append(vdisk)

        total = 0
        failed = 0
        skipped = 0
        storagedrivers = {}
        for vdisk in vdisks:
            try:
                total += 1
                # Load the vDisk's StorageDriver
                vdisk.invalidate_dynamics(['info', 'storagedriver_id'])
                if vdisk.storagedriver_id not in storagedrivers:
                    storagedrivers[
                        vdisk.
                        storagedriver_id] = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                storagedriver = storagedrivers[vdisk.storagedriver_id]
                # Load the vDisk's MDS configuration
                vdisk.invalidate_dynamics(['info'])
                configs = vdisk.info['metadata_backend_config']
                if len(configs) == 0:
                    raise RuntimeError('Could not load MDS configuration')
                if configs[0]['ip'] != storagedriver.storagerouter.ip:
                    # The MDS master is not local. Trigger an MDS handover and try again
                    logger.debug(
                        'MDS for volume {0} is not local. Trigger handover'.
                        format(vdisk.volume_id))
                    MDSServiceController.ensure_safety(vdisk)
                    vdisk.invalidate_dynamics(['info'])
                    configs = vdisk.info['metadata_backend_config']
                    if len(configs) == 0:
                        raise RuntimeError('Could not load MDS configuration')
                    if configs[0]['ip'] != storagedriver.storagerouter.ip:
                        skipped += 1
                        logger.info(
                            'Skipping scrubbing work unit for volume {0}: MDS master is not local'
                            .format(vdisk.volume_id))
                        continue
                work_units = vdisk.storagedriver_client.get_scrubbing_workunits(
                    str(vdisk.volume_id))
                for work_unit in work_units:
                    scrubbing_result = _storagedriver_scrubber.scrub(
                        work_unit, str(storagedriver.mountpoint_temp))
                    vdisk.storagedriver_client.apply_scrubbing_result(
                        scrubbing_result)
            except Exception, ex:
                failed += 1
                logger.info(
                    'Failed scrubbing work unit for volume {0}: {1}'.format(
                        vdisk.volume_id, ex))
Example #40
0
    def create_from_template(diskguid,
                             machinename,
                             devicename,
                             pmachineguid,
                             machineguid=None,
                             storagedriver_guid=None):
        """
        Create a disk from a template

        @param devicename: device file name for the disk (eg: mydisk-flat.vmdk)
        @param machineguid: guid of the machine to assign disk to
        @return diskguid: guid of new disk
        """

        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        disk_path = hypervisor.get_disk_path(machinename, devicename)

        description = '{} {}'.format(machinename, devicename)
        properties_to_clone = [
            'description', 'size', 'type', 'retentionpolicyid',
            'snapshotpolicyid', 'vmachine', 'vpool'
        ]

        vdisk = VDisk(diskguid)
        if vdisk.vmachine and not vdisk.vmachine.is_vtemplate:
            # Disk might not be attached to a vmachine, but still be a template
            raise RuntimeError('The given vdisk does not belong to a template')

        if storagedriver_guid is not None:
            storagedriver_id = StorageDriver(
                storagedriver_guid).storagedriver_id
        else:
            storagedriver_id = vdisk.storagedriver_id
        storagedriver = StorageDriverList.get_by_storagedriver_id(
            storagedriver_id)
        if storagedriver is None:
            raise RuntimeError(
                'Could not find StorageDriver with id {0}'.format(
                    storagedriver_id))

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(
            disk_path)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.vmachine = VMachine(
            machineguid) if machineguid else vdisk.vmachine
        new_vdisk.save()

        mds_service = MDSServiceController.get_preferred_mds(
            storagedriver.storagerouter, vdisk.vpool)
        if mds_service is None:
            raise RuntimeError('Could not find a MDS service')

        logger.info(
            'Create disk from template {} to new disk {} to location {}'.
            format(vdisk.name, new_vdisk.name, disk_path))
        try:
            volume_id = vdisk.storagedriver_client.create_clone_from_template(
                target_path=disk_path,
                metadata_backend_config=MDSMetaDataBackendConfig([
                    MDSNodeConfig(address=str(
                        mds_service.service.storagerouter.ip),
                                  port=mds_service.service.ports[0])
                ]),
                parent_volume_id=str(vdisk.volume_id),
                node_id=str(storagedriver_id))
            new_vdisk.volume_id = volume_id
            new_vdisk.save()
            MDSServiceController.ensure_safety(new_vdisk)

        except Exception as ex:
            logger.error(
                'Clone disk on volumedriver level failed with exception: {0}'.
                format(str(ex)))
            new_vdisk.delete()
            raise

        return {
            'diskguid': new_vdisk.guid,
            'name': new_vdisk.name,
            'backingdevice': disk_path
        }
Example #41
0
    def clone(diskguid, snapshotid, devicename, pmachineguid, machinename=None, machineguid=None, detached=False):
        """
        Clone a disk
        :param diskguid: Guid of the disk to clone
        :param snapshotid: ID of the snapshot to clone from
        :param devicename: Name of the device to use in clone's description
        :param pmachineguid: Guid of the physical machine
        :param machinename: Name of the machine the disk is attached to
        :param machineguid: Guid of the machine
        :param detached: Boolean indicating the disk is attached to a machine or not
        """
        # 1. Validations
        name_regex = "^[0-9a-zA-Z][-_a-zA-Z0-9]{1,48}[a-zA-Z0-9]$"
        if not re.match(name_regex, devicename):
            raise RuntimeError("Invalid name for virtual disk clone")

        if VDiskList.get_vdisk_by_name(vdiskname=devicename) is not None:
            raise RuntimeError("A virtual disk with this name already exists")

        vdisk = VDisk(diskguid)
        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
        if storagedriver is None:
            raise RuntimeError('Could not find StorageDriver with ID {0}'.format(vdisk.storagedriver_id))

        if machineguid is not None and detached is True:
            raise ValueError('A vMachine GUID was specified while detached is True')

        # 2. Create new snapshot if required
        if snapshotid is None:
            timestamp = str(int(time.time()))
            metadata = {'label': '',
                        'is_consistent': False,
                        'timestamp': timestamp,
                        'machineguid': machineguid,
                        'is_automatic': True}
            sd_snapshot_id = VDiskController.create_snapshot(diskguid, metadata)
            tries = 25  # 5 minutes
            while snapshotid is None and tries > 0:
                time.sleep(25 - tries)
                tries -= 1
                vdisk.invalidate_dynamics(['snapshots'])
                for snapshot in vdisk.snapshots:
                    if snapshot['guid'] != sd_snapshot_id:
                        continue
                    if snapshot['in_backend'] is True:
                        snapshotid = snapshot['guid']
            if snapshotid is None:
                try:
                    VDiskController.delete_snapshot(diskguid=diskguid,
                                                    snapshotid=sd_snapshot_id)
                except:
                    pass
                raise RuntimeError('Could not find created snapshot in time')

        # 3. Model new cloned virtual disk
        hypervisor = Factory.get(PMachine(pmachineguid))
        location = hypervisor.get_disk_path(machinename, devicename)

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=['description', 'size', 'type', 'retentionpolicyguid', 'snapshotpolicyguid', 'autobackup'])
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = devicename
        new_vdisk.description = devicename if machinename is None else '{0} {1}'.format(machinename, devicename)
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location)
        new_vdisk.parentsnapshot = snapshotid
        if detached is False:
            new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.save()

        # 4. Configure Storage Driver
        try:
            mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
            if mds_service is None:
                raise RuntimeError('Could not find a MDS service')

            logger.info('Clone snapshot {0} of disk {1} to location {2}'.format(snapshotid, vdisk.name, location))
            backend_config = MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip),
                                                                     port=mds_service.service.ports[0])])
            volume_id = vdisk.storagedriver_client.create_clone(target_path=location,
                                                                metadata_backend_config=backend_config,
                                                                parent_volume_id=str(vdisk.volume_id),
                                                                parent_snapshot_id=str(snapshotid),
                                                                node_id=str(vdisk.storagedriver_id))
        except Exception as ex:
            logger.error('Caught exception during clone, trying to delete the volume. {0}'.format(ex))
            try:
                VDiskController.clean_bad_disk(new_vdisk.guid)
            except Exception as ex2:
                logger.exception('Exception during exception handling of "create_clone_from_template" : {0}'.format(str(ex2)))
            raise

        new_vdisk.volume_id = volume_id
        new_vdisk.save()

        # 5. Check MDS & DTL for new clone
        try:
            MDSServiceController.ensure_safety(new_vdisk)
        except Exception as ex:
            logger.error('Caught exception during "ensure_safety" {0}'.format(ex))
        VDiskController.dtl_checkup.delay(vdisk_guid=new_vdisk.guid)

        return {'diskguid': new_vdisk.guid,
                'name': new_vdisk.name,
                'backingdevice': location}
Example #42
0
    def clone(diskguid,
              snapshotid,
              devicename,
              pmachineguid,
              machinename,
              machineguid=None):
        """
        Clone a disk
        """
        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        description = '{} {}'.format(machinename, devicename)
        properties_to_clone = [
            'description', 'size', 'type', 'retentionpolicyguid',
            'snapshotpolicyguid', 'autobackup'
        ]
        vdisk = VDisk(diskguid)
        location = hypervisor.get_backing_disk_path(machinename, devicename)

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{0}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(location)
        new_vdisk.parentsnapshot = snapshotid
        new_vdisk.vmachine = VMachine(
            machineguid) if machineguid else vdisk.vmachine
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.save()

        try:
            storagedriver = StorageDriverList.get_by_storagedriver_id(
                vdisk.storagedriver_id)
            if storagedriver is None:
                raise RuntimeError(
                    'Could not find StorageDriver with id {0}'.format(
                        vdisk.storagedriver_id))

            mds_service = MDSServiceController.get_preferred_mds(
                storagedriver.storagerouter, vdisk.vpool)
            if mds_service is None:
                raise RuntimeError('Could not find a MDS service')

            logger.info('Clone snapshot {} of disk {} to location {}'.format(
                snapshotid, vdisk.name, location))
            volume_id = vdisk.storagedriver_client.create_clone(
                target_path=location,
                metadata_backend_config=MDSMetaDataBackendConfig([
                    MDSNodeConfig(address=str(
                        mds_service.service.storagerouter.ip),
                                  port=mds_service.service.ports[0])
                ]),
                parent_volume_id=str(vdisk.volume_id),
                parent_snapshot_id=str(snapshotid),
                node_id=str(vdisk.storagedriver_id))
        except Exception as ex:
            logger.error(
                'Caught exception during clone, trying to delete the volume. {0}'
                .format(ex))
            new_vdisk.delete()
            VDiskController.delete_volume(location)
            raise

        new_vdisk.volume_id = volume_id
        new_vdisk.save()

        try:
            MDSServiceController.ensure_safety(new_vdisk)
        except Exception as ex:
            logger.error(
                'Caught exception during "ensure_safety" {0}'.format(ex))

        return {
            'diskguid': new_vdisk.guid,
            'name': new_vdisk.name,
            'backingdevice': location
        }
Example #43
0
    def create_from_template(
        diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None
    ):
        """
        Create a disk from a template

        @param parentdiskguid: guid of the disk
        @param location: location where virtual device should be created (eg: myVM)
        @param devicename: device file name for the disk (eg: mydisk-flat.vmdk)
        @param machineguid: guid of the machine to assign disk to
        @return diskguid: guid of new disk
        """

        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        disk_path = hypervisor.get_disk_path(machinename, devicename)

        description = "{} {}".format(machinename, devicename)
        properties_to_clone = [
            "description",
            "size",
            "type",
            "retentionpolicyid",
            "snapshotpolicyid",
            "vmachine",
            "vpool",
        ]

        vdisk = VDisk(diskguid)
        if vdisk.vmachine and not vdisk.vmachine.is_vtemplate:
            # Disk might not be attached to a vmachine, but still be a template
            raise RuntimeError("The given vdisk does not belong to a template")

        if storagedriver_guid is not None:
            storagedriver_id = StorageDriver(storagedriver_guid).storagedriver_id
        else:
            storagedriver_id = vdisk.storagedriver_id
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        if storagedriver is None:
            raise RuntimeError("Could not find StorageDriver with id {0}".format(storagedriver_id))

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = "{}-clone".format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.save()

        mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
        if mds_service is None:
            raise RuntimeError("Could not find a MDS service")

        logger.info(
            "Create disk from template {} to new disk {} to location {}".format(vdisk.name, new_vdisk.name, disk_path)
        )
        try:
            volume_id = vdisk.storagedriver_client.create_clone_from_template(
                target_path=disk_path,
                metadata_backend_config=MDSMetaDataBackendConfig(
                    [
                        MDSNodeConfig(
                            address=str(mds_service.service.storagerouter.ip), port=mds_service.service.ports[0]
                        )
                    ]
                ),
                parent_volume_id=str(vdisk.volume_id),
                node_id=str(storagedriver_id),
            )
            new_vdisk.volume_id = volume_id
            new_vdisk.save()
            MDSServiceController.ensure_safety(new_vdisk)

        except Exception as ex:
            logger.error("Clone disk on volumedriver level failed with exception: {0}".format(str(ex)))
            new_vdisk.delete()
            raise

        # Allow "regular" users to use this volume
        # Do not use run for other user than ovs as it blocks asking for root password
        # Do not use run_local for other user as it doesn't have permission
        # So this method only works if this is called by root or ovs
        storagerouter = StorageRouter(new_vdisk.storagerouter_guid)
        mountpoint = storagedriver.mountpoint
        location = "{0}{1}".format(mountpoint, disk_path)
        client = SSHClient.load(storagerouter.pmachine.ip)
        print(client.run('chmod 664 "{0}"'.format(location)))
        print(client.run('chown ovs:ovs "{0}"'.format(location)))
        return {"diskguid": new_vdisk.guid, "name": new_vdisk.name, "backingdevice": disk_path}
Example #44
0
 def test_storagedriver_config_set(self):
     """
     Validates whether storagedriver configuration is generated as expected
     """
     PersistentFactory.get_client().set('ovs.storagedriver.mds.safety', 3)
     vpools, storagerouters, storagedrivers, services, mds_services, _ = self._build_service_structure(
         {
             'vpools': [1, 2],
             'storagerouters': [1, 2, 3, 4, 5, 6],
             'storagedrivers': [(1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, 4),
                                (5, 2, 4), (6, 2, 5),
                                (7, 2, 6)],  # (<id>, <vpool_id>, <sr_id>)
             'mds_services': [(1, 1), (2, 1), (3, 2), (4, 3), (5, 4),
                              (6, 5), (7, 6), (8, 7), (9, 7)]
         }  # (<id>, <sd_id>)
     )
     vdisks = {}
     start_id = 1
     for mds_service in mds_services.itervalues():
         vdisks.update(
             self._create_vdisks_for_mds_service(10,
                                                 start_id,
                                                 mds_service=mds_service))
         start_id += 10
     mds_services[1].capacity = 11  # on 1, vpool 1
     mds_services[1].save()
     mds_services[2].capacity = 20  # on 1, vpool 1
     mds_services[2].save()
     mds_services[3].capacity = 12  # on 2, vpool 1
     mds_services[3].save()
     mds_services[4].capacity = 14  # on 3, vpool 1
     mds_services[4].save()
     mds_services[5].capacity = 16  # on 4, vpool 1
     mds_services[5].save()
     mds_services[6].capacity = 11  # on 4, vpool 2
     mds_services[6].save()
     mds_services[7].capacity = 13  # on 5, vpool 2
     mds_services[7].save()
     mds_services[8].capacity = 19  # on 6, vpool 2
     mds_services[8].save()
     mds_services[9].capacity = 15  # on 6, vpool 2
     mds_services[9].save()
     config = MDSServiceController.get_mds_storagedriver_config_set(
         vpools[1])
     expected = {
         storagerouters[1].guid: [{
             'host': '10.0.0.1',
             'port': 2
         }, {
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.3',
             'port': 4
         }],
         storagerouters[2].guid: [{
             'host': '10.0.0.2',
             'port': 3
         }, {
             'host': '10.0.0.1',
             'port': 2
         }, {
             'host': '10.0.0.4',
             'port': 5
         }],
         storagerouters[3].guid: [{
             'host': '10.0.0.3',
             'port': 4
         }, {
             'host': '10.0.0.1',
             'port': 2
         }, {
             'host': '10.0.0.4',
             'port': 5
         }],
         storagerouters[4].guid: [{
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.1',
             'port': 2
         }, {
             'host': '10.0.0.3',
             'port': 4
         }]
     }
     self.assertDictEqual(
         config, expected,
         'Test 1. Got:\n{0}'.format(json.dumps(config, indent=2)))
     mds_services[2].capacity = 10  # on 1, vpool 1
     mds_services[2].save()
     config = MDSServiceController.get_mds_storagedriver_config_set(
         vpools[1])
     expected = {
         storagerouters[1].guid: [{
             'host': '10.0.0.1',
             'port': 1
         }, {
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.3',
             'port': 4
         }],
         storagerouters[2].guid: [{
             'host': '10.0.0.2',
             'port': 3
         }, {
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.3',
             'port': 4
         }],
         storagerouters[3].guid: [{
             'host': '10.0.0.3',
             'port': 4
         }, {
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.2',
             'port': 3
         }],
         storagerouters[4].guid: [{
             'host': '10.0.0.4',
             'port': 5
         }, {
             'host': '10.0.0.3',
             'port': 4
         }, {
             'host': '10.0.0.2',
             'port': 3
         }]
     }
     self.assertDictEqual(
         config, expected,
         'Test 2. Got:\n{0}'.format(json.dumps(config, indent=2)))
Example #45
0
    def create_from_template(diskguid, machinename, devicename, pmachineguid, machineguid=None, storagedriver_guid=None):
        """
        Create a disk from a template

        @param devicename: device file name for the disk (eg: mydisk-flat.vmdk)
        @param machineguid: guid of the machine to assign disk to
        @return diskguid: guid of new disk
        """

        pmachine = PMachine(pmachineguid)
        hypervisor = Factory.get(pmachine)
        disk_path = hypervisor.get_disk_path(machinename, devicename)

        description = '{} {}'.format(machinename, devicename)
        properties_to_clone = [
            'description', 'size', 'type', 'retentionpolicyid',
            'snapshotpolicyid', 'vmachine', 'vpool']

        vdisk = VDisk(diskguid)
        if vdisk.vmachine and not vdisk.vmachine.is_vtemplate:
            # Disk might not be attached to a vmachine, but still be a template
            raise RuntimeError('The given vdisk does not belong to a template')

        if storagedriver_guid is not None:
            storagedriver_id = StorageDriver(storagedriver_guid).storagedriver_id
        else:
            storagedriver_id = vdisk.storagedriver_id
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        if storagedriver is None:
            raise RuntimeError('Could not find StorageDriver with id {0}'.format(storagedriver_id))

        new_vdisk = VDisk()
        new_vdisk.copy(vdisk, include=properties_to_clone)
        new_vdisk.vpool = vdisk.vpool
        new_vdisk.devicename = hypervisor.clean_backing_disk_filename(disk_path)
        new_vdisk.parent_vdisk = vdisk
        new_vdisk.name = '{}-clone'.format(vdisk.name)
        new_vdisk.description = description
        new_vdisk.vmachine = VMachine(machineguid) if machineguid else vdisk.vmachine
        new_vdisk.save()

        mds_service = MDSServiceController.get_preferred_mds(storagedriver.storagerouter, vdisk.vpool)
        if mds_service is None:
            raise RuntimeError('Could not find a MDS service')

        logger.info('Create disk from template {} to new disk {} to location {}'.format(
            vdisk.name, new_vdisk.name, disk_path
        ))
        try:
            volume_id = vdisk.storagedriver_client.create_clone_from_template(
                target_path=disk_path,
                metadata_backend_config=MDSMetaDataBackendConfig([MDSNodeConfig(address=str(mds_service.service.storagerouter.ip),
                                                                                port=mds_service.service.ports[0])]),
                parent_volume_id=str(vdisk.volume_id),
                node_id=str(storagedriver_id)
            )
            new_vdisk.volume_id = volume_id
            new_vdisk.save()
            MDSServiceController.ensure_safety(new_vdisk)

        except Exception as ex:
            logger.error('Clone disk on volumedriver level failed with exception: {0}'.format(str(ex)))
            new_vdisk.delete()
            raise

        return {'diskguid': new_vdisk.guid, 'name': new_vdisk.name,
                'backingdevice': disk_path}
Example #46
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(
            vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and ServiceManager.get_service_status(
                        name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    port = System.get_free_ports(selected_range=port_range,
                                                 nr=1,
                                                 client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path('alba_proxy'),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    ServiceManager.add_service(name='ovs-albaproxy',
                                               params=params,
                                               client=client,
                                               target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service,
                                                 client=client)
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service,
                                                     client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service,
                                                client=client)
                ServiceManager.remove_service(name=alba_proxy_service,
                                              client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_directory,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber', allow_override=True)
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service,
                                              client=client):
                    ServiceManager.stop_service(alba_proxy_service,
                                                client=client)
                    ServiceManager.remove_service(alba_proxy_service,
                                                  client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)