def check_filedrivers(result_handler):
     """
     Checks if the file drivers work on a local machine (compatible with multiple vPools)
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     """
     result_handler.info('Checking file drivers.', add_to_result=False)
     vpools = VPoolList.get_vpools()
     # perform tests
     if len(vpools) == 0:
         result_handler.skip('No vPools found!')
         return
     for vp in vpools:
         name = 'ovs-healthcheck-test-{0}'.format(VolumedriverHealthCheck.LOCAL_ID)
         if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
             result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
             continue
         try:
             VolumedriverHealthCheck._check_filedriver(vp.name, name)
             if os.path.exists('/mnt/{0}/{1}.xml'.format(vp.name, name)):
                 # working
                 VolumedriverHealthCheck._check_filedriver_remove(vp.name)
                 result_handler.success('Filedriver for vPool {0} is working fine!'.format(vp.name))
             else:
                 # not working
                 result_handler.failure('Filedriver for vPool {0} seems to have problems!'.format(vp.name))
         except TimeoutError:
             # timeout occurred, action took too long
             result_handler.warning('Filedriver of vPool {0} seems to have `timeout` problems'.format(vp.name))
         except subprocess.CalledProcessError:
             # can be input/output error by filedriver
             result_handler.failure('Filedriver of vPool {0} seems to have `input/output` problems'.format(vp.name))
Exemple #2
0
    def get_stats_vpools(cls):
        """
        Retrieve statistics for each vPool
        """
        if cls._config is None:
            cls.validate_and_retrieve_config()

        stats = []
        errors = False
        environment = cls._config['environment']
        for vpool in VPoolList.get_vpools():
            try:
                stats.append({
                    'tags': {
                        'vpool_name': vpool.name,
                        'environment': environment
                    },
                    'fields':
                    cls._convert_to_float_values(
                        cls._pop_realtime_info(vpool.statistics)),
                    'measurement':
                    'vpool'
                })
            except Exception:
                errors = True
                cls._logger.exception(
                    'Retrieving statistics for vPool {0} failed'.format(
                        vpool.name))
        return errors, stats
    def get_vpool_stats():
        """
        Send Vpool statistics to InfluxDB
        """
        points = []
        vpools = VPoolList.get_vpools()
        if len(vpools) == 0:
            StatsmonkeyScheduledTaskController._logger.info("No vpools found")
            return

        for vpool in vpools:
            try:
                metrics = StatsmonkeyScheduledTaskController._pop_realtime_info(vpool.statistics)
                vpool_name = vpool.name

                entry = {
                    'measurement': 'vpool_stats',
                    'tags': {
                        'vpool_name': vpool_name
                    },
                    'fields': metrics
                }
                points.append(entry)
            except Exception as ex:
                StatsmonkeyScheduledTaskController._logger.error(ex.message)

        if len(points) == 0:
            StatsmonkeyScheduledTaskController._logger.info("No statistics found")
            return

        StatsmonkeyScheduledTaskController._send_stats(points)
        return points
    def get_vpools():
        """
        Get all vpools on a cluster

        :return: vpools
        :rtype: list
        """

        return VPoolList.get_vpools()
    def check_volumedrivers(result_handler):
        """
        Checks if the VOLUMEDRIVERS work on a local machine (compatible with multiple vPools)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        result_handler.info('Checking volumedrivers.', add_to_result=False)
        vpools = VPoolList.get_vpools()
        if len(vpools) == 0:
            result_handler.skip('No vPools found!')
            return
        for vp in vpools:
            name = 'ovs-healthcheck-test-{0}.raw'.format(VolumedriverHealthCheck.LOCAL_ID)
            if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            try:
                # delete if previous vdisk with this name exists
                storagedriver_guid = next((storagedriver.guid for storagedriver in vp.storagedrivers
                                           if storagedriver.storagedriver_id == vp.name +
                                           VolumedriverHealthCheck.LOCAL_ID))
                # create a new one
                volume = VolumedriverHealthCheck._check_volumedriver(name, storagedriver_guid, result_handler)

                if volume is True:
                    # delete the recently created
                    try:
                        VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name)
                    except Exception as ex:
                        raise RuntimeError('Could not delete the created volume. Got {0}'.format(str(ex)))
                    # Working at this point
                    result_handler.success('Volumedriver of vPool {0} is working fine!'.format(vp.name))
                else:
                    # not working
                    result_handler.failure('Something went wrong during vdisk creation on vpool {0}.'.format(vp.name))

            except TimeoutError:
                # timeout occurred, action took too long
                result_handler.warning('Volumedriver of vPool {0} seems to timeout.'.format(vp.name))
            except IOError as ex:
                # can be input/output error by volumedriver
                result_handler.failure('Volumedriver of vPool {0} seems to have IO problems. Got `{1}` while executing.'.format(vp.name, ex.message))
            except RuntimeError as ex:
                result_handler.failure('Volumedriver of vPool {0} seems to have problems. Got `{1}` while executing.'.format(vp.name, ex))
            except VDiskNotFoundError:
                result_handler.warning('Volume on vPool {0} was not found, please retry again'.format(vp.name))
            except Exception as ex:
                result_handler.failure('Uncaught exception for Volumedriver of vPool {0}.Got {1} while executing.'.format(vp.name, ex))
            finally:
                # Attempt to delete the created vdisk
                try:
                    VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name, present=False)
                except:
                    pass
    def check_model_consistency(result_handler):
        """
        Checks if the model consistency of OVSDB vs. VOLUMEDRIVER and does a preliminary check on RABBITMQ
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        result_handler.info('Checking model consistency: ')

        # Checking consistency of volumedriver vs. ovsdb and backwards
        for vp in VPoolList.get_vpools():
            if vp.guid not in OpenvStorageHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            result_handler.info('Checking consistency of volumedriver vs. ovsdb for {0}: '.format(vp.name), add_to_result=False)
            missing_in_volumedriver = []
            missing_in_model = []
            try:
                # noinspection PyArgumentList
                voldrv_volume_list = vp.storagedriver_client.list_volumes()
            except (ClusterNotReachableException, RuntimeError) as ex:
                result_handler.warning('Seems like the volumedriver {0} is not running. Got {1}'.format(vp.name, str(ex)),
                                       code=ErrorCodes.voldrv_connection_problem)
                continue

            vdisk_volume_ids = []
            # Cross-reference model vs. volumedriver
            for vdisk in vp.vdisks:
                vdisk_volume_ids.append(vdisk.volume_id)
                if vdisk.volume_id not in voldrv_volume_list:
                    missing_in_volumedriver.append(vdisk.guid)
                else:
                    voldrv_volume_list.remove(vdisk.volume_id)
            # Cross-reference volumedriver vs. model
            for voldrv_id in voldrv_volume_list:
                if voldrv_id not in vdisk_volume_ids:
                    missing_in_model.append(voldrv_id)

            # Display discrepancies for vPool
            if len(missing_in_volumedriver) != 0:
                result_handler.warning('Detected volumes that are MISSING in volumedriver but are in ovsdb in vpool: {0} - vdisk guid(s):{1}.'
                                       .format(vp.name, ' '.join(missing_in_volumedriver)),
                                       code=ErrorCodes.missing_volumedriver)
            else:
                result_handler.success('No discrepancies found for ovsdb in vPool {0}'.format(vp.name), code=ErrorCodes.missing_volumedriver)

            if len(missing_in_model) != 0:
                result_handler.warning('Detected volumes that are AVAILABLE in volumedriver but are not in ovsdb in vpool: {0} - vdisk volume id(s):{1}'
                                       .format(vp.name, ', '.join(missing_in_model)),
                                       code=ErrorCodes.missing_ovsdb)
            else:
                result_handler.success('No discrepancies found for voldrv in vpool {0}'.format(vp.name), code=ErrorCodes.missing_ovsdb)
    def _ns_statistics(self):
        """
        Returns a list of the ASDs namespaces
        """
        # Collect ALBA related statistics
        alba_dataset = {}
        for namespace in self.ns_data:
            if namespace['namespace']['state'] != 'active':
                continue
            alba_dataset[namespace['name']] = namespace['statistics']

        # Collect vPool/vDisk data
        vdisk_dataset = {}
        for vpool in VPoolList.get_vpools():
            vdisk_dataset[vpool] = vpool.storagedriver_client.list_volumes()

        # Collect global usage
        global_usage = {'size': 0,
                        'used': 0}
        for stats in self.asd_statistics.values():
            global_usage['size'] += stats['capacity']
            global_usage['used'] += stats['disk_usage']

        # Cross merge
        dataset = {'global': {'size': global_usage['size'],
                              'used': global_usage['used']},
                   'vpools': {},
                   'overhead': 0,
                   'unknown': {'storage': 0,
                               'logical': 0}}
        for vpool in vdisk_dataset:
            for namespace in vdisk_dataset[vpool]:
                if namespace in alba_dataset:
                    if vpool.guid not in dataset['vpools']:
                        dataset['vpools'][vpool.guid] = {'storage': 0,
                                                         'logical': 0}
                    dataset['vpools'][vpool.guid]['storage'] += alba_dataset[namespace]['storage']
                    dataset['vpools'][vpool.guid]['logical'] += alba_dataset[namespace]['logical']
                    del alba_dataset[namespace]
            fd_namespace = 'fd-{0}-{1}'.format(vpool.name, vpool.guid)
            if fd_namespace in alba_dataset:
                if vpool.guid not in dataset['vpools']:
                    dataset['vpools'][vpool.guid] = {'storage': 0,
                                                     'logical': 0}
                dataset['vpools'][vpool.guid]['storage'] += alba_dataset[fd_namespace]['storage']
                dataset['vpools'][vpool.guid]['logical'] += alba_dataset[fd_namespace]['logical']
                del alba_dataset[fd_namespace]
        for namespace in alba_dataset:
            dataset['unknown']['storage'] += alba_dataset[namespace]['storage']
            dataset['unknown']['logical'] += alba_dataset[namespace]['logical']
        dataset['overhead'] = max(0, dataset['global']['used'] - dataset['unknown']['storage'] - sum(usage['storage'] for usage in dataset['vpools'].values()))
        return dataset
 def mds_checkup():
     """
     Validates the current MDS setup/configuration and takes actions where required
     """
     mds_dict = {}
     for vpool in VPoolList.get_vpools():
         for mds_service in vpool.mds_services:
             storagerouter = mds_service.service.storagerouter
             if vpool not in mds_dict:
                 mds_dict[vpool] = {}
             if storagerouter not in mds_dict[vpool]:
                 mds_dict[vpool][storagerouter] = {'client': SSHClient(storagerouter, username='******'),
                                                   'services': []}
             mds_dict[vpool][storagerouter]['services'].append(mds_service)
     for vpool, storagerouter_info in mds_dict.iteritems():
         # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded
         # If not, create an extra MDS for that StorageRouter
         for storagerouter in storagerouter_info:
             client = mds_dict[vpool][storagerouter]['client']
             mds_services = mds_dict[vpool][storagerouter]['services']
             has_room = False
             for mds_service in mds_services[:]:
                 if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0:
                     client = SSHClient(storagerouter)
                     MDSServiceController.remove_mds_service(mds_service, client, storagerouter, vpool, reload_config=True)
                     mds_services.remove(mds_service)
             for mds_service in mds_services:
                 _, load = MDSServiceController.get_mds_load(mds_service)
                 if load < Configuration.get('ovs.storagedriver.mds.maxload'):
                     has_room = True
                     break
             if has_room is False:
                 mds_service = MDSServiceController.prepare_mds_service(client, storagerouter, vpool,
                                                                        fresh_only=False, reload_config=True)
                 if mds_service is None:
                     raise RuntimeError('Could not add MDS node')
                 mds_services.append(mds_service)
         mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool)
         for storagerouter in mds_dict[vpool]:
             client = mds_dict[vpool][storagerouter]['client']
             storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name)
             storagedriver_config.load(client)
             if storagedriver_config.is_new is False:
                 storagedriver_config.clean()  # Clean out obsolete values
                 storagedriver_config.configure_filesystem(
                     fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid]
                 )
                 storagedriver_config.save(client)
         # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal.
         for vdisk in vpool.vdisks:
             MDSServiceController.ensure_safety(vdisk)
Exemple #9
0
 def list_volumes(vpool_guid=None):
     """
     List all known volumes on a specific vpool or on all
     """
     if vpool_guid is not None:
         vpool = VPool(vpool_guid)
         storagedriver_client = StorageDriverClient().load(vpool)
         response = storagedriver_client.list_volumes()
     else:
         response = []
         for vpool in VPoolList.get_vpools():
             storagedriver_client = StorageDriverClient().load(vpool)
             response.extend(storagedriver_client.list_volumes())
     return response
Exemple #10
0
 def list_volumes(vpool_guid=None):
     """
     List all known volumes on a specific vpool or on all
     """
     if vpool_guid is not None:
         vpool = VPool(vpool_guid)
         storagedriver_client = StorageDriverClient.load(vpool)
         response = storagedriver_client.list_volumes()
     else:
         response = []
         for vpool in VPoolList.get_vpools():
             storagedriver_client = StorageDriverClient.load(vpool)
             response.extend(storagedriver_client.list_volumes())
     return response
    def checkForHaltedVolumes(self):

        self.utility.logger("Checking for halted volumes: ", self.module, 3, 'checkHaltedVolumes', False)

        vpools = VPoolList.get_vpools()

        if len(vpools) != 0:

            for vp in vpools:

                haltedVolumes = []

                self.utility.logger("Checking vPool '{0}': ".format(vp.name), self.module, 3,
                                    'checkVPOOL_{0}'.format(vp.name), False)

                config_file = self.utility.fetchConfigFilePath(vp.name, self.machine_id, 1, vp.guid)
                voldrv_client = src.LocalStorageRouterClient(config_file)

                for volume in voldrv_client.list_volumes():
                    # check if volume is halted, returns: 0 or 1
                    try:
                        if int(self.utility.parseXMLtoJSON(voldrv_client.info_volume(volume))
                                   ["boost_serialization"]["XMLRPCVolumeInfo"]["halted"]):
                            haltedVolumes.append(volume)
                    except ObjectNotFoundException:
                        # ignore ovsdb invalid entrees
                        # model consistency will handle it.
                        continue
                    except MaxRedirectsExceededException:
                        # this means the volume is not halted but detached or unreachable for the volumedriver
                        haltedvolumes.append(volume)

                # print all results
                if len(haltedVolumes) > 0:
                    self.utility.logger("Detected volumes that are HALTED in volumedriver in vPool '{0}': {1}"
                                        .format(vp.name, ', '.join(haltedVolumes)), self.module, 0,
                                        'halted')
                else:
                    self.utility.logger("No halted volumes detected in vPool '{0}'"
                                        .format(vp.name), self.module, 1,
                                        'halted')

        else:
            self.utility.logger("No vPools found!".format(len(vpools)), self.module, 5, 'halted')
Exemple #12
0
 def configure_proxy(backend_name, proxy_configuration):
     faulty_keys = [
         key for key in proxy_configuration.keys()
         if key not in ProxySetup.PARAMS
     ]
     if len(faulty_keys) > 0:
         raise ValueError(
             '{0} are unsupported keys for proxy configuration.'.format(
                 ', '.join(faulty_keys)))
     ExtensionsToolbox.verify_required_params(ProxySetup.PARAMS,
                                              proxy_configuration)
     vpools = VPoolList.get_vpools()
     service_manager = ServiceFactory.get_manager()
     with open('/root/old_proxies', 'w') as backup_file:
         for vpool in vpools:
             if vpool.metadata['backend']['backend_info'][
                     'name'] != backend_name:
                 continue
             for storagedriver in vpool.storagedrivers:
                 for proxy in storagedriver.alba_proxies:
                     config_loc = 'ovs/vpools/{0}/proxies/{1}/config/main'.format(
                         vpool.guid, proxy.guid)
                     proxy_service = Service(proxy.service_guid)
                     proxy_config = Configuration.get(config_loc)
                     old_proxy_config = dict(proxy_config)
                     backup_file.write('{} -- {}\n'.format(
                         config_loc, old_proxy_config))
                     proxy_config.update(proxy_configuration)
                     ProxySetup.LOGGER.info(
                         "Changed {0} to {1} for proxy {2}".format(
                             old_proxy_config, proxy_config, config_loc))
                     ProxySetup.LOGGER.info("Changed items {0}".format([
                         (key, value)
                         for key, value in proxy_config.iteritems()
                         if key not in old_proxy_config.keys()
                     ]))
                     Configuration.set(config_loc,
                                       json.dumps(proxy_config, indent=4),
                                       raw=True)
                     client = SSHClient(storagedriver.storage_ip,
                                        username='******')
                     service_manager.restart_service(proxy_service.name,
                                                     client=client)
Exemple #13
0
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        Actions:
            * Verify which StorageRouters are available
            * Make mapping between vPools and its StorageRouters
            * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available
            * For each vPool retrieve the optimal configuration and store it for each StorageDriver
            * For each vPool run an ensure safety for all vDisks
        :raises RuntimeError: When ensure safety fails for any vDisk
        :return: None
        :rtype: NoneType
        """
        MDSServiceController._logger.info('Started')

        mds_dict, offline_nodes = MDSServiceController._get_mds_information(
            VPoolList.get_vpools())

        ensure_safety_failures = []
        for vpool, storagerouter_info in mds_dict.iteritems():
            try:
                MDSServiceController.mds_checkup_single(
                    vpool.guid,
                    mds_dict,
                    offline_nodes,
                    ensure_single_timeout=1)
            except MDSCheckupEnsureSafetyFailures as ex:
                ensure_safety_failures.append(ex)
            except EnsureSingleTimeoutReached:
                # This exception is raised by the callback. The mds checkup calls the single checkup inline which can
                # invoke the callback if the same instance is already running (because of extend/shrink)
                MDSServiceController._logger.info(
                    'MDS Checkup single already running for VPool {}'.format(
                        vpool.guid))

        if ensure_safety_failures:
            raise RuntimeError(''.join(
                (str(failure) for failure in ensure_safety_failures)))
        MDSServiceController._logger.info('Finished')
    def checkFileDriver(self):
        filedriversNotWorking = []
        name = "ovs-healthcheck-test-{0}".format(self.machine_id)

        self.utility.logger("Checking filedrivers: ", self.module, 3, 'checkFileDrivers', False)

        vpools = VPoolList.get_vpools()

        # perform tests
        if len(vpools) != 0:

            for vp in vpools:

                # check filedriver
                t = threading.Thread(target=self._checkFiledriver, args=(1, vp.name, name))
                t.daemon = True
                t.start()

                time.sleep(5)

                # if thread is still alive after x seconds or got exception, something is wrong
                if t.isAlive() or not os.path.exists("/mnt/{0}/{1}.xml".format(vp.name, name)):
                    filedriversNotWorking.append(vp.name)

                # clean-up
                if len(filedriversNotWorking) == 0:
                    self.utility.executeBashCommand("rm -f /mnt/{0}/{1}.xml".format(vp.name, name))


            # check if filedrivers are OK!
            if len(filedriversNotWorking) == 0:
                self.utility.logger("All filedrivers seem to be working fine!", self.module, 1, 'filedrivers')
            else:
                self.utility.logger("Some filedrivers seem to have some problems: {0}"
                                    .format(', '.join(filedriversNotWorking)), self.module, 0, 'filedrivers')

        else:
            self.utility.logger("No vPools found!", self.module, 5, 'filedrivers')
Exemple #15
0
    def execute_scrub(vpool_guids=None,
                      vdisk_guids=None,
                      storagerouter_guid=None):
        """
        Divide the scrub work among all StorageRouters with a SCRUB partition
        :param vpool_guids: Guids of the vPools that need to be scrubbed completely
        :type vpool_guids: list
        :param vdisk_guids: Guids of the vDisks that need to be scrubbed
        :type vdisk_guids: list
        :param storagerouter_guid: Guid of the StorageRouter to execute the scrub work on
        :type storagerouter_guid: str
        :return: None
        :rtype: NoneType
        """
        if vpool_guids is not None and not isinstance(vpool_guids, list):
            raise ValueError('vpool_guids should be a list')
        if vdisk_guids is not None and not isinstance(vdisk_guids, list):
            raise ValueError('vdisk_guids should be a list')
        if storagerouter_guid is not None and not isinstance(
                storagerouter_guid, basestring):
            raise ValueError('storagerouter_guid should be a str')

        GenericController._logger.info('Scrubber - Started')
        scrub_locations = []
        storagerouters = StorageRouterList.get_storagerouters(
        ) if storagerouter_guid is None else [
            StorageRouter(storagerouter_guid)
        ]
        for storage_router in storagerouters:
            scrub_partitions = storage_router.partition_config.get(
                DiskPartition.ROLES.SCRUB, [])
            if len(scrub_partitions) == 0:
                continue

            try:
                SSHClient(endpoint=storage_router, username='******')
                for partition_guid in scrub_partitions:
                    partition = DiskPartition(partition_guid)
                    GenericController._logger.info(
                        'Scrubber - Storage Router {0} has {1} partition at {2}'
                        .format(storage_router.ip, DiskPartition.ROLES.SCRUB,
                                partition.folder))
                    scrub_locations.append({
                        'scrub_path': str(partition.folder),
                        'partition_guid': partition.guid,
                        'storage_router': storage_router
                    })
            except UnableToConnectException:
                GenericController._logger.warning(
                    'Scrubber - Storage Router {0} is not reachable'.format(
                        storage_router.ip))

        if len(scrub_locations) == 0:
            raise ValueError('No scrub locations found, cannot scrub')

        vpool_vdisk_map = {}
        if vpool_guids is None and vdisk_guids is None:
            vpool_vdisk_map = dict((vpool, list(vpool.vdisks))
                                   for vpool in VPoolList.get_vpools())
        else:
            if vpool_guids is not None:
                for vpool_guid in set(vpool_guids):
                    vpool = VPool(vpool_guid)
                    vpool_vdisk_map[vpool] = list(vpool.vdisks)
            if vdisk_guids is not None:
                for vdisk_guid in set(vdisk_guids):
                    vdisk = VDisk(vdisk_guid)
                    if vdisk.vpool not in vpool_vdisk_map:
                        vpool_vdisk_map[vdisk.vpool] = []
                    if vdisk not in vpool_vdisk_map[vdisk.vpool]:
                        vpool_vdisk_map[vdisk.vpool].append(vdisk)

        number_of_vpools = len(vpool_vdisk_map)
        if number_of_vpools >= 6:
            max_stacks_per_vpool = 1
        elif number_of_vpools >= 3:
            max_stacks_per_vpool = 2
        else:
            max_stacks_per_vpool = 5

        threads = []
        counter = 0
        error_messages = []
        for vp, vdisks in vpool_vdisk_map.iteritems():
            # Verify amount of vDisks on vPool
            GenericController._logger.info(
                'Scrubber - vPool {0} - Checking scrub work'.format(vp.name))
            if len(vdisks) == 0:
                GenericController._logger.info(
                    'Scrubber - vPool {0} - No scrub work'.format(vp.name))
                continue

            # Fill queue with all vDisks for current vPool
            vpool_queue = Queue()
            for vd in vdisks:
                if vd.is_vtemplate is True:
                    GenericController._logger.info(
                        'Scrubber - vPool {0} - vDisk {1} {2} - Is a template, not scrubbing'
                        .format(vp.name, vd.guid, vd.name))
                    continue
                vd.invalidate_dynamics('storagedriver_id')
                if not vd.storagedriver_id:
                    GenericController._logger.warning(
                        'Scrubber - vPool {0} - vDisk {1} {2} - No StorageDriver ID found'
                        .format(vp.name, vd.guid, vd.name))
                    continue
                vpool_queue.put(vd.guid)

            stacks_to_spawn = min(max_stacks_per_vpool, len(scrub_locations))
            GenericController._logger.info(
                'Scrubber - vPool {0} - Spawning {1} stack{2}'.format(
                    vp.name, stacks_to_spawn,
                    '' if stacks_to_spawn == 1 else 's'))
            for _ in xrange(stacks_to_spawn):
                scrub_target = scrub_locations[counter % len(scrub_locations)]
                stack = Thread(
                    target=GenericController._deploy_stack_and_scrub,
                    args=(vpool_queue, vp, scrub_target, error_messages))
                stack.start()
                threads.append(stack)
                counter += 1

        for thread in threads:
            thread.join()

        if len(error_messages) > 0:
            raise Exception('Errors occurred while scrubbing:\n  - {0}'.format(
                '\n  - '.join(error_messages)))
    def _ns_statistics(self):
        """
        Returns a list of the ASDs namespaces
        """
        # Collect ALBA related statistics
        alba_dataset = {}
        for namespace in self.ns_data:
            if namespace['namespace']['state'] != 'active':
                continue
            alba_dataset[namespace['name']] = namespace['statistics']
        # Collect vPool/vDisk data
        vdisk_dataset = {}
        for vpool in VPoolList.get_vpools():
            if vpool not in vdisk_dataset:
                vdisk_dataset[vpool] = []
            for vdisk in vpool.vdisks:
                vdisk_dataset[vpool].append(vdisk.volume_id)

        # Load disk statistics
        def _load_disks(_node, _dict):
            for _asd in _node.all_disks:
                if 'asd_id' in _asd and _asd['asd_id'] in asds and 'usage' in _asd:
                    _dict['size'] += _asd['usage']['size']
                    _dict['used'] += _asd['usage']['used']

        global_usage = {'size': 0,
                        'used': 0}
        nodes = set()
        asds = []
        for asd in self.asds:
            asds.append(asd.asd_id)
            if asd.alba_node not in nodes:
                nodes.add(asd.alba_node)
        threads = []
        for node in nodes:
            thread = Thread(target=_load_disks, args=(node, global_usage))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Cross merge
        dataset = {'global': {'size': global_usage['size'],
                              'used': global_usage['used']},
                   'vpools': {},
                   'unknown': {'storage': 0,
                               'logical': 0}}
        for vpool in vdisk_dataset:
            for namespace in vdisk_dataset[vpool]:
                if namespace in alba_dataset:
                    if vpool.guid not in dataset['vpools']:
                        dataset['vpools'][vpool.guid] = {'storage': 0,
                                                         'logical': 0}
                    dataset['vpools'][vpool.guid]['storage'] += alba_dataset[namespace]['storage']
                    dataset['vpools'][vpool.guid]['logical'] += alba_dataset[namespace]['logical']
                    del alba_dataset[namespace]
            fd_namespace = 'fd-{0}-{1}'.format(vpool.name, vpool.guid)
            if fd_namespace in alba_dataset:
                if vpool.guid not in dataset['vpools']:
                    dataset['vpools'][vpool.guid] = {'storage': 0,
                                                     'logical': 0}
                dataset['vpools'][vpool.guid]['storage'] += alba_dataset[fd_namespace]['storage']
                dataset['vpools'][vpool.guid]['logical'] += alba_dataset[fd_namespace]['logical']
                del alba_dataset[fd_namespace]
        for namespace in alba_dataset:
            dataset['unknown']['storage'] += alba_dataset[namespace]['storage']
            dataset['unknown']['logical'] += alba_dataset[namespace]['logical']
        return dataset
Exemple #17
0
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        """
        MDSServiceController._logger.info('MDS checkup - Started')
        mds_dict = {}
        for vpool in VPoolList.get_vpools():
            MDSServiceController._logger.info('MDS checkup - vPool {0}'.format(
                vpool.name))
            mds_dict[vpool] = {}
            for mds_service in vpool.mds_services:
                storagerouter = mds_service.service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': None,
                        'services': []
                    }
                    try:
                        mds_dict[vpool][storagerouter]['client'] = SSHClient(
                            storagerouter, username='******')
                        MDSServiceController._logger.info(
                            'MDS checkup - vPool {0} - Storage Router {1} - ONLINE'
                            .format(vpool.name, storagerouter.name))
                    except UnableToConnectException:
                        MDSServiceController._logger.info(
                            'MDS checkup - vPool {0} - Storage Router {1} - OFFLINE'
                            .format(vpool.name, storagerouter.name))
                mds_dict[vpool][storagerouter]['services'].append(mds_service)

        failures = []
        max_load = Configuration.get(
            '/ovs/framework/storagedriver|mds_maxload')
        for vpool, storagerouter_info in mds_dict.iteritems():
            # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded
            # If not, create an extra MDS for that StorageRouter
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]['client']
                mds_services = mds_dict[vpool][storagerouter]['services']
                has_room = False
                for mds_service in mds_services[:]:
                    if mds_service.capacity == 0 and len(
                            mds_service.vdisks_guids) == 0:
                        MDSServiceController._logger.info(
                            'MDS checkup - Removing mds_service {0} for vPool {1}'
                            .format(mds_service.number, vpool.name))
                        MDSServiceController.remove_mds_service(
                            mds_service,
                            vpool,
                            reconfigure=True,
                            allow_offline=client is None)
                        mds_services.remove(mds_service)
                for mds_service in mds_services:
                    _, load = MDSServiceController.get_mds_load(mds_service)
                    if load < max_load:
                        has_room = True
                        break
                MDSServiceController._logger.info(
                    'MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}'
                    .format(vpool.name, storagerouter.name, has_room))
                if has_room is False and client is not None:
                    mds_service = MDSServiceController.prepare_mds_service(
                        storagerouter=storagerouter,
                        vpool=vpool,
                        fresh_only=False,
                        reload_config=True)
                    if mds_service is None:
                        raise RuntimeError('Could not add MDS node')
                    mds_services.append(mds_service)
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                vpool, True)
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]['client']
                if client is None:
                    MDSServiceController._logger.info(
                        'MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration'
                        .format(vpool.name, storagerouter.name))
                    continue
                storagedriver = [
                    sd for sd in storagerouter.storagedrivers
                    if sd.vpool_guid == vpool.guid
                ][0]
                storagedriver_config = StorageDriverConfiguration(
                    'storagedriver', vpool.guid,
                    storagedriver.storagedriver_id)
                storagedriver_config.load()
                if storagedriver_config.is_new is False:
                    MDSServiceController._logger.info(
                        'MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}'
                        .format(vpool.name, storagerouter.name,
                                mds_config_set[storagerouter.guid]))
                    storagedriver_config.configure_filesystem(
                        fs_metadata_backend_mds_nodes=mds_config_set[
                            storagerouter.guid])
                    storagedriver_config.save(client)
            # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal.
            MDSServiceController._logger.info(
                'MDS checkup - vPool {0} - Ensuring safety for all virtual disks'
                .format(vpool.name))
            for vdisk in vpool.vdisks:
                try:
                    MDSServiceController.ensure_safety(vdisk)
                except Exception:
                    message = 'Ensure safety for vDisk {0} with guid {1} failed'.format(
                        vdisk.name, vdisk.guid)
                    MDSServiceController._logger.exception(message)
                    failures.append(message)
        if len(failures) > 0:
            raise Exception('\n - ' + '\n - '.join(failures))
        MDSServiceController._logger.info('MDS checkup - Finished')
    def checkModelConsistency(self):

        self.utility.logger("Checking model consistency: ", self.module, 3, 'checkModelConsistency', False)

        #
        # RabbitMQ check: cluster verification
        #

        self.utility.logger("Precheck: verification of RabbitMQ cluster: ", self.module, 3,
                                'checkRabbitMQcluster', False)

        if self.utility.node_type == "MASTER":

            cluster_status = self.utility.executeBashCommand("rabbitmqctl cluster_status")

            if "Error" not in cluster_status[1]:

                # this can happen
                if len(cluster_status) <= 3:
                    partition_status = cluster_status[2]
                else:
                    partition_status = cluster_status[3]

                # check parition status
                if '@' in partition_status:
                    self.utility.logger("Seems like the RabbitMQ cluster has 'partition' problems, please check this...",
                                        self.module, 0, 'process_rabbitmq', False)
                else:
                    self.utility.logger("RabbitMQ does not seem to have 'partition' problems!", self.module, 1,
                                        'process_rabbitmq', False)
            else:
                self.utility.logger("Seems like the RabbitMQ cluster has errors, maybe it is offline?", self.module, 0,
                                    'process_rabbitmq', False)

        else:
            self.utility.logger("RabbitMQ is not running/active on this server!", self.module, 5,
                                'process_rabbitmq', False)

        #
        # Checking consistency of volumedriver vs. ovsdb and backwards
        #

        for vp in VPoolList.get_vpools():

            self.utility.logger("Checking consistency of volumedriver vs. ovsdb for vPool '{0}': ".format(vp.name),
                                self.module, 3, 'checkDiscrepanciesVoldrvOvsdb', False)

            # list of vdisks that are in model but are not in volumedriver
            missingInVolumedriver = []

            # list of volumes that are in volumedriver but are not in model
            missingInModel = []

            # fetch configfile of vpool for the volumedriver
            config_file = self.utility.fetchConfigFilePath(vp.name, self.machine_id, 1, vp.guid)
            voldrv_client = src.LocalStorageRouterClient(config_file)

            # collect data from volumedriver
            voldrv_volume_list = voldrv_client.list_volumes()

            # collect data from model
            model_vdisk_list = vp.vdisks
            vol_ids = [vdisk.volume_id for vdisk in vp.vdisks]

            # crossreference model vs. volumedriver
            for vdisk in vol_ids:
                if vdisk not in voldrv_volume_list:
                    missingInVolumedriver.append(vdisk)

            # crossreference volumedriver vs. model
            for voldrv_id in voldrv_volume_list:
                if voldrv_id not in vol_ids:
                    missingInModel.append(voldrv_id)

            # display discrepancies for vPool
            if len(missingInVolumedriver) != 0:
                self.utility.logger("Detected volumes that are MISSING in volumedriver but ARE in ovsdb in vPool "
                                    "'{0}': {1}".format(vp.name, ', '.join(missingInVolumedriver)), self.module, 0,
                                    'discrepancies_ovsdb_{0}'.format(vp.name))
            else:
                self.utility.logger("NO discrepancies found for ovsdb in vPool '{0}'".format(vp.name), self.module, 1,
                                    'discrepancies_ovsdb_{0}'.format(vp.name))

            if len(missingInModel) != 0:
                self.utility.logger("Detected volumes that are AVAILABLE in volumedriver but ARE NOT in ovsdb in vPool "
                                    "'{0}': {1}".format(vp.name, ', '.join(missingInModel)), self.module, 0,
                                    'discrepancies_voldrv_{0}'.format(vp.name))
            else:
                self.utility.logger("NO discrepancies found for voldrv in vPool '{0}'".format(vp.name), self.module, 1,
                                    'discrepancies_voldrv_{0}'.format(vp.name))
Exemple #19
0
    def cluster_registry_checkup():
        """
        Verify whether changes have occurred in the cluster registry for each vPool
        :return: Information whether changes occurred
        :rtype: dict
        """
        changed_vpools = {}
        for vpool in VPoolList.get_vpools():
            changed_vpools[vpool.guid] = {'changes': False, 'success': True}
            try:
                StorageDriverController._logger.info(
                    'Validating cluster registry settings for Vpool {0}'.
                    format(vpool.guid))

                current_configs = vpool.clusterregistry_client.get_node_configs(
                )
                changes = len(current_configs) == 0
                node_configs = []
                for sd in vpool.storagedrivers:
                    sd.invalidate_dynamics(['cluster_node_config'])
                    new_config = sd.cluster_node_config
                    node_configs.append(ClusterNodeConfig(**new_config))
                    if changes is False:
                        current_node_configs = [
                            config for config in current_configs
                            if config.vrouter_id == sd.storagedriver_id
                        ]
                        if len(current_node_configs) == 1:
                            current_node_config = current_node_configs[0]
                            for key in new_config:
                                if getattr(current_node_config,
                                           key) != new_config[key]:
                                    changes = True
                                    break
                changed_vpools[vpool.guid]['changes'] = changes

                if changes is True:
                    StorageDriverController._logger.info(
                        'Cluster registry settings for Vpool {0} needs to be updated'
                        .format(vpool.guid))
                    available_storagedrivers = []
                    for sd in vpool.storagedrivers:
                        storagerouter = sd.storagerouter
                        try:
                            SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            StorageDriverController._logger.warning(
                                'StorageRouter {0} not available.'.format(
                                    storagerouter.name))
                            continue

                        with remote(storagerouter.ip,
                                    [LocalStorageRouterClient]) as rem:
                            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                                vpool.guid, sd.storagedriver_id)
                            if Configuration.exists(sd_key) is True:
                                path = Configuration.get_configuration_path(
                                    sd_key)
                                try:
                                    lsrc = rem.LocalStorageRouterClient(path)
                                    lsrc.server_revision(
                                    )  # 'Cheap' call to verify whether volumedriver is responsive
                                    available_storagedrivers.append(sd)
                                except Exception as ex:
                                    if 'ClusterNotReachableException' in str(
                                            ex):
                                        StorageDriverController._logger.warning(
                                            'StorageDriver {0} on StorageRouter {1} not available.'
                                            .format(sd.guid,
                                                    storagerouter.name))
                                    else:
                                        StorageDriverController._logger.exception(
                                            'Got exception when validating StorageDriver {0} on StorageRouter {1}.'
                                            .format(sd.guid,
                                                    storagerouter.name))

                    StorageDriverController._logger.info(
                        'Updating cluster node configs for VPool {0}'.format(
                            vpool.guid))
                    vpool.clusterregistry_client.set_node_configs(node_configs)
                    for sd in available_storagedrivers:
                        StorageDriverController._logger.info(
                            'Trigger config reload for StorageDriver {0}'.
                            format(sd.guid))
                        vpool.storagedriver_client.update_cluster_node_configs(
                            str(sd.storagedriver_id), req_timeout_secs=10)
                    StorageDriverController._logger.info(
                        'Updating cluster node configs for Vpool {0} completed'
                        .format(vpool.guid))
                else:
                    StorageDriverController._logger.info(
                        'Cluster registry settings for Vpool {0} is up to date'
                        .format(vpool.guid))
            except Exception as ex:
                StorageDriverController._logger.exception(
                    'Got exception when validating cluster registry settings for Vpool {0}.'
                    .format(vpool.name))
                changed_vpools[vpool.guid]['success'] = False
                changed_vpools[vpool.guid]['error'] = ex.message
        return changed_vpools
Exemple #20
0
    def get_stats_vdisks(cls):
        """
        Retrieve statistics about all vDisks on the system.
        Check the safety, storage amount on the Backend, fail-over status and others
        """
        if cls._config is None:
            cls.validate_and_retrieve_config()

        stats = []
        errors = False
        environment = cls._config['environment']
        alba_backend_info = {}
        for alba_backend in AlbaBackendList.get_albabackends():
            config_path = Configuration.get_configuration_path(
                alba_backend.abm_cluster.config_location)
            disk_safety = {}
            namespace_usage = {}

            # Retrieve namespace, preset and disk safety information
            try:
                preset_info = AlbaCLI.run(
                    command='list-presets', config=config_path
                )  # Not using alba_backend.presets, because it takes a whole lot longer to retrieve
                all_namespace_info = AlbaCLI.run(command='show-namespaces',
                                                 config=config_path,
                                                 extra_params=['--max=-1'])[1]
                all_disk_safety_info = AlbaCLI.run(command='get-disk-safety',
                                                   config=config_path)
            except Exception:
                errors = True
                cls._logger.exception(
                    'Retrieving information for ALBA Backend {0} failed'.
                    format(alba_backend.name))
                continue

            alba_backend_info[alba_backend.guid] = {
                'disk_safety': disk_safety,
                'namespace_usage': namespace_usage
            }

            # Parse namespace information
            for namespace_info in all_namespace_info:
                namespace_usage[namespace_info['name']] = float(
                    namespace_info['statistics']['storage'])

            # Parse preset information
            policies = []
            preset_name = None
            for preset in preset_info:
                if preset['in_use'] is not True:
                    continue
                preset_name = preset['name']
                policies.extend(preset['policies'])
            if preset_name is None:
                continue

            # Parse disk safety information
            total_objects = 0
            max_lost_disks = 0
            max_disk_safety = 0
            bucket_overview = {}
            disk_lost_overview = {}
            disk_safety_overview = {}
            for disk_safety_info in all_disk_safety_info:
                safety = disk_safety_info['safety']
                volume_id = disk_safety_info['namespace']
                disk_safety[volume_id] = float(
                    safety) if safety is not None else safety

                for bucket_safety in disk_safety_info['bucket_safety']:
                    bucket = bucket_safety['bucket']
                    objects = bucket_safety['count']
                    remaining_safety = bucket_safety['remaining_safety']

                    if bucket[1] > max_lost_disks:
                        max_lost_disks = bucket[1]
                    if remaining_safety > max_disk_safety:
                        max_disk_safety = remaining_safety

                    for policy in policies:
                        k = policy[0] == bucket[0]
                        m = policy[1] == bucket[1]
                        c = policy[2] <= bucket[2]
                        x = policy[3] >= bucket[3]
                        if k and m and c and x:
                            if preset_name not in bucket_overview:
                                bucket_overview[preset_name] = {
                                    'policy': str(policy),
                                    'presets': {}
                                }

                    bucket[2] -= bucket_safety['applicable_dead_osds']
                    if str(bucket
                           ) not in bucket_overview[preset_name]['presets']:
                        bucket_overview[preset_name]['presets'][str(
                            bucket)] = {
                                'objects': 0,
                                'disk_safety': 0
                            }

                    disk_lost = bucket[0] + bucket[1] - bucket[
                        2]  # Data fragments + parity fragments - amount of fragments to write + dead osds
                    if disk_lost not in disk_lost_overview:
                        disk_lost_overview[disk_lost] = 0
                    if remaining_safety not in disk_safety_overview:
                        disk_safety_overview[remaining_safety] = 0

                    total_objects += objects
                    disk_lost_overview[disk_lost] += objects
                    disk_safety_overview[remaining_safety] += objects
                    bucket_overview[preset_name]['presets'][str(
                        bucket)]['objects'] += objects
                    bucket_overview[preset_name]['presets'][str(
                        bucket)]['disk_safety'] = remaining_safety

            # Create statistics regarding disk safety
            for disk_lost_number in xrange(max_lost_disks + 1):
                stats.append({
                    'tags': {
                        'disk_lost': disk_lost_number,
                        'environment': environment,
                        'backend_name': alba_backend.name
                    },
                    'fields': {
                        'objects': disk_lost_overview.get(disk_lost_number, 0),
                        'total_objects': total_objects
                    },
                    'measurement': 'disk_lost'
                })

            for disk_safety_number in xrange(max_disk_safety + 1):
                stats.append({
                    'tags': {
                        'disk_safety': disk_safety_number,
                        'environment': environment,
                        'backend_name': alba_backend.name
                    },
                    'fields': {
                        'objects':
                        disk_safety_overview.get(disk_safety_number, 0),
                        'total_objects': total_objects
                    },
                    'measurement': 'disk_safety'
                })

            for preset_name, result in bucket_overview.iteritems():
                for bucket_count, bucket_result in result['presets'].iteritems(
                ):
                    stats.append({
                        'tags': {
                            'bucket': bucket_count,
                            'policy': result['policy'],
                            'preset_name': preset_name,
                            'environment': environment,
                            'disk_safety': bucket_result['disk_safety'],
                            'backend_name': alba_backend.name
                        },
                        'fields': {
                            'objects': bucket_result['objects'],
                            'total_objects': total_objects
                        },
                        'measurement': 'bucket'
                    })

        # Integrate namespace and disk safety information in vPool stats
        for vpool in VPoolList.get_vpools():
            alba_backend_guid = vpool.metadata['backend']['backend_info'][
                'alba_backend_guid']
            for vdisk in vpool.vdisks:
                try:
                    metrics = cls._convert_to_float_values(
                        cls._pop_realtime_info(vdisk.statistics))
                    metrics['failover_mode'] = vdisk.dtl_status
                    metrics['frontend_size'] = float(vdisk.size)
                    metrics['failover_mode_status'] = cls._FAILOVER_MAP.get(
                        vdisk.dtl_status, 3)
                    if alba_backend_guid in alba_backend_info:
                        metrics['disk_safety'] = alba_backend_info[
                            alba_backend_guid]['disk_safety'].get(
                                vdisk.volume_id)
                        metrics['backend_stored'] = alba_backend_info[
                            alba_backend_guid]['namespace_usage'].get(
                                vdisk.volume_id)

                    stats.append({
                        'tags': {
                            'disk_name':
                            vdisk.name,
                            'volume_id':
                            vdisk.volume_id,
                            'vpool_name':
                            vdisk.vpool.name,
                            'environment':
                            environment,
                            'storagerouter_name':
                            StorageRouter(vdisk.storagerouter_guid).name
                        },
                        'fields': metrics,
                        'measurement': 'vdisk'
                    })
                except Exception:
                    errors = True
                    cls._logger.exception(
                        'Retrieving statistics for vDisk {0} with guid {1} failed'
                        .format(vdisk.name, vdisk.guid))
        return errors, stats
Exemple #21
0
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        """
        logger.info('MDS checkup - Started')
        mds_dict = {}
        for vpool in VPoolList.get_vpools():
            logger.info('MDS checkup - vPool {0}'.format(vpool.name))
            mds_dict[vpool] = {}
            for mds_service in vpool.mds_services:
                storagerouter = mds_service.service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {'client': None,
                                                      'services': []}
                    try:
                        client = SSHClient(storagerouter, username = '******')
                        client.run('pwd')
                        mds_dict[vpool][storagerouter]['client'] = client
                        logger.info('MDS checkup - vPool {0} - Storage Router {1} - ONLINE'.format(vpool.name, storagerouter.name))
                    except UnableToConnectException:
                        logger.info('MDS checkup - vPool {0} - Storage Router {1} - OFFLINE'.format(vpool.name, storagerouter.name))
                mds_dict[vpool][storagerouter]['services'].append(mds_service)

        failures = []
        max_load = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_maxload')
        for vpool, storagerouter_info in mds_dict.iteritems():
            # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded
            # If not, create an extra MDS for that StorageRouter
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]['client']
                mds_services = mds_dict[vpool][storagerouter]['services']
                has_room = False
                for mds_service in mds_services[:]:
                    if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0:
                        logger.info('MDS checkup - Removing mds_service {0} for vPool {1}'.format(mds_service.number, vpool.name))
                        MDSServiceController.remove_mds_service(mds_service, vpool, reconfigure=True, allow_offline=client is None)
                        mds_services.remove(mds_service)
                for mds_service in mds_services:
                    _, load = MDSServiceController.get_mds_load(mds_service)
                    if load < max_load:
                        has_room = True
                        break
                logger.info('MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}'.format(vpool.name, storagerouter.name, has_room))
                if has_room is False and client is not None:
                    mds_service = MDSServiceController.prepare_mds_service(storagerouter=storagerouter,
                                                                           vpool=vpool,
                                                                           fresh_only=False,
                                                                           reload_config=True)
                    if mds_service is None:
                        raise RuntimeError('Could not add MDS node')
                    mds_services.append(mds_service)
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool, True)
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]['client']
                if client is None:
                    logger.info('MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration'.format(vpool.name, storagerouter.name))
                    continue
                storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name)
                storagedriver_config.load(client)
                if storagedriver_config.is_new is False:
                    logger.info('MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}'.format(vpool.name, storagerouter.name, mds_config_set[storagerouter.guid]))
                    storagedriver_config.clean()  # Clean out obsolete values
                    storagedriver_config.configure_filesystem(fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid])
                    storagedriver_config.save(client)
            # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal.
            logger.info('MDS checkup - vPool {0} - Ensuring safety for all virtual disks'.format(vpool.name))
            for vdisk in vpool.vdisks:
                try:
                    MDSServiceController.ensure_safety(vdisk)
                except Exception as ex:
                    failures.append('Ensure safety for vDisk {0} with guid {1} failed with error: {2}'.format(vdisk.name, vdisk.guid, ex))
        if len(failures) > 0:
            raise Exception('\n - ' + '\n - '.join(failures))
        logger.info('MDS checkup - Finished')
Exemple #22
0
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        """
        MDSServiceController._logger.info("MDS checkup - Started")
        mds_dict = {}
        for vpool in VPoolList.get_vpools():
            MDSServiceController._logger.info("MDS checkup - vPool {0}".format(vpool.name))
            mds_dict[vpool] = {}
            for mds_service in vpool.mds_services:
                storagerouter = mds_service.service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {"client": None, "services": []}
                    try:
                        mds_dict[vpool][storagerouter]["client"] = SSHClient(storagerouter, username="******")
                        MDSServiceController._logger.info(
                            "MDS checkup - vPool {0} - Storage Router {1} - ONLINE".format(
                                vpool.name, storagerouter.name
                            )
                        )
                    except UnableToConnectException:
                        MDSServiceController._logger.info(
                            "MDS checkup - vPool {0} - Storage Router {1} - OFFLINE".format(
                                vpool.name, storagerouter.name
                            )
                        )
                mds_dict[vpool][storagerouter]["services"].append(mds_service)

        failures = []
        max_load = Configuration.get("/ovs/framework/storagedriver|mds_maxload")
        for vpool, storagerouter_info in mds_dict.iteritems():
            # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded
            # If not, create an extra MDS for that StorageRouter
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]["client"]
                mds_services = mds_dict[vpool][storagerouter]["services"]
                has_room = False
                for mds_service in mds_services[:]:
                    if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0:
                        MDSServiceController._logger.info(
                            "MDS checkup - Removing mds_service {0} for vPool {1}".format(
                                mds_service.number, vpool.name
                            )
                        )
                        MDSServiceController.remove_mds_service(
                            mds_service, vpool, reconfigure=True, allow_offline=client is None
                        )
                        mds_services.remove(mds_service)
                for mds_service in mds_services:
                    _, load = MDSServiceController.get_mds_load(mds_service)
                    if load < max_load:
                        has_room = True
                        break
                MDSServiceController._logger.info(
                    "MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}".format(
                        vpool.name, storagerouter.name, has_room
                    )
                )
                if has_room is False and client is not None:
                    mds_service = MDSServiceController.prepare_mds_service(
                        storagerouter=storagerouter, vpool=vpool, fresh_only=False, reload_config=True
                    )
                    if mds_service is None:
                        raise RuntimeError("Could not add MDS node")
                    mds_services.append(mds_service)
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool, True)
            for storagerouter in storagerouter_info:
                client = mds_dict[vpool][storagerouter]["client"]
                if client is None:
                    MDSServiceController._logger.info(
                        "MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration".format(
                            vpool.name, storagerouter.name
                        )
                    )
                    continue
                storagedriver = [sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid][0]
                storagedriver_config = StorageDriverConfiguration(
                    "storagedriver", vpool.guid, storagedriver.storagedriver_id
                )
                storagedriver_config.load()
                if storagedriver_config.is_new is False:
                    MDSServiceController._logger.info(
                        "MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}".format(
                            vpool.name, storagerouter.name, mds_config_set[storagerouter.guid]
                        )
                    )
                    storagedriver_config.configure_filesystem(
                        fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid]
                    )
                    storagedriver_config.save(client)
            # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal.
            MDSServiceController._logger.info(
                "MDS checkup - vPool {0} - Ensuring safety for all virtual disks".format(vpool.name)
            )
            for vdisk in vpool.vdisks:
                try:
                    MDSServiceController.ensure_safety(vdisk)
                except Exception:
                    message = "Ensure safety for vDisk {0} with guid {1} failed".format(vdisk.name, vdisk.guid)
                    MDSServiceController._logger.exception(message)
                    failures.append(message)
        if len(failures) > 0:
            raise Exception("\n - " + "\n - ".join(failures))
        MDSServiceController._logger.info("MDS checkup - Finished")
Exemple #23
0
    def execute_scrub():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Scrubber - Started')
        vpools = VPoolList.get_vpools()
        error_messages = []
        scrub_locations = []
        for storage_router in StorageRouterList.get_storagerouters():
            scrub_partitions = storage_router.partition_config.get(DiskPartition.ROLES.SCRUB, [])
            if len(scrub_partitions) == 0:
                continue
            if len(scrub_partitions) > 1:
                raise RuntimeError('Multiple {0} partitions defined for StorageRouter {1}'.format(DiskPartition.ROLES.SCRUB, storage_router.name))

            partition = DiskPartition(scrub_partitions[0])
            ScheduledTaskController._logger.info('Scrubber - Storage Router {0:<15} has {1} partition at {2}'.format(storage_router.ip, DiskPartition.ROLES.SCRUB, partition.folder))
            try:
                SSHClient(storage_router, 'root')
                scrub_locations.append({'scrub_path': str(partition.folder),
                                        'storage_router': storage_router})
            except UnableToConnectException:
                ScheduledTaskController._logger.warning('Scrubber - Storage Router {0:<15} is not reachable'.format(storage_router.ip))

        number_of_vpools = len(vpools)
        if number_of_vpools >= 6:
            max_threads_per_vpool = 1
        elif number_of_vpools >= 3:
            max_threads_per_vpool = 2
        else:
            max_threads_per_vpool = 5

        threads = []
        counter = 0
        for vp in vpools:
            # Verify amount of vDisks on vPool
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - Checking scrub work'.format(vp.name))
            if len(vp.vdisks) == 0:
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - No scrub work'.format(vp.name))
                continue

            # Fill queue with all vDisks for current vPool
            vpool_queue = Queue()
            for vd in vp.vdisks:
                if vd.is_vtemplate is True:
                    continue
                vd.invalidate_dynamics('storagedriver_id')
                if not vd.storagedriver_id:
                    ScheduledTaskController._logger.warning('Scrubber - vPool {0} - vDisk {1} {2} - No StorageDriver ID found'.format(vp.name, vd.guid, vd.name))
                    continue
                vpool_queue.put(vd.guid)

            # Copy backend connection manager information in separate key
            threads_to_spawn = min(max_threads_per_vpool, len(scrub_locations))
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - Spawning {1} thread{2}'.format(vp.name, threads_to_spawn, '' if threads_to_spawn == 1 else 's'))
            for _ in range(threads_to_spawn):
                scrub_target = scrub_locations[counter % len(scrub_locations)]
                thread = Thread(target=ScheduledTaskController.execute_scrub_work,
                                name='scrub_{0}_{1}'.format(vp.guid, scrub_target['storage_router'].guid),
                                args=(vpool_queue, vp, scrub_target, error_messages))
                thread.start()
                threads.append(thread)
                counter += 1

        for thread in threads:
            thread.join()

        if len(error_messages) > 0:
            raise Exception('Errors occurred while scrubbing:\n  - {0}'.format('\n  - '.join(error_messages)))
Exemple #24
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        version 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - The datastore is still empty, add defaults
        if working_version < 1:
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.backendtype import BackendType
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters +
                                                                  string.digits +
                                                                  '|_=+*#@!/-[]{}<>.?,\'";:~')
                                                    for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [
                (admin_group, [read_role, write_role, manage_role]),
                (viewers_group, [read_role])
            ]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add backends
            for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'),
                                      ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

            # Failure Domain
            failure_domain = FailureDomain()
            failure_domain.name = 'Default'
            failure_domain.save()

            # We're now at version 1
            working_version = 1

        # Version 2 introduced:
        # - new Descriptor format
        if working_version < 2:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']:
                        filename = data[entry]['source']
                        if not filename.startswith('/'):
                            filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename)
                        module = imp.load_source(data[entry]['name'], filename)
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            # We're now at version 2
            working_version = 2

        # Version 3 introduced:
        # - new Descriptor format
        if working_version < 3:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry]:
                        module = imp.load_source(data[entry]['name'], data[entry]['source'])
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            working_version = 3

        # Version 4 introduced:
        # - Flexible SSD layout
        if working_version < 4:
            import os
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
            for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services:
                mds_service = service.mds_service
                storagedriver = None
                for current_storagedriver in service.storagerouter.storagedrivers:
                    if current_storagedriver.vpool_guid == mds_service.vpool_guid:
                        storagedriver = current_storagedriver
                        break
                tasks = {}
                if storagedriver._data.get('mountpoint_md'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS)
                if storagedriver._data.get('mountpoint_temp'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS)
                for disk in service.storagerouter.disks:
                    for partition in disk.partitions:
                        for directory, (role, subrole) in tasks.iteritems():
                            with remote(storagedriver.storagerouter.ip, [os], username='******') as rem:
                                stat_dir = directory
                                while not rem.os.path.exists(stat_dir) and stat_dir != '/':
                                    stat_dir = stat_dir.rsplit('/', 1)[0]
                                    if not stat_dir:
                                        stat_dir = '/'
                                inode = rem.os.stat(stat_dir).st_dev
                            if partition.inode == inode:
                                if role not in partition.roles:
                                    partition.roles.append(role)
                                    partition.save()
                                number = 0
                                migrated = False
                                for sd_partition in storagedriver.partitions:
                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                        if sd_partition.mds_service == mds_service:
                                            migrated = True
                                            break
                                        if sd_partition.partition_guid == partition.guid:
                                            number = max(sd_partition.number, number)
                                if migrated is False:
                                    sd_partition = StorageDriverPartition()
                                    sd_partition.role = role
                                    sd_partition.sub_role = subrole
                                    sd_partition.partition = partition
                                    sd_partition.storagedriver = storagedriver
                                    sd_partition.mds_service = mds_service
                                    sd_partition.size = None
                                    sd_partition.number = number + 1
                                    sd_partition.save()
                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                    path = sd_partition.path.rsplit('/', 1)[0]
                                    if path:
                                        client.dir_create(path)
                                        client.dir_chown(path, 'ovs', 'ovs')
                                    client.dir_create(directory)
                                    client.dir_chown(directory, 'ovs', 'ovs')
                                    client.symlink({sd_partition.path: directory})
            for storagedriver in StorageDriverList.get_storagedrivers():
                migrated_objects = {}
                for disk in storagedriver.storagerouter.disks:
                    for partition in disk.partitions:
                        # Process all mountpoints that are unique and don't have a specified size
                        for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD,
                                                                                                'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}),
                                                     'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}),
                                                     'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}),
                                                     'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems():
                            if key in storagedriver._data:
                                is_list = isinstance(storagedriver._data[key], list)
                                entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]]
                                for entry in entries:
                                    if not entry:
                                        if is_list:
                                            storagedriver._data[key].remove(entry)
                                            if len(storagedriver._data[key]) == 0:
                                                del storagedriver._data[key]
                                        else:
                                            del storagedriver._data[key]
                                    else:
                                        with remote(storagedriver.storagerouter.ip, [os], username='******') as rem:
                                            inode = rem.os.stat(entry).st_dev
                                        if partition.inode == inode:
                                            if role not in partition.roles:
                                                partition.roles.append(role)
                                                partition.save()
                                            for folder, subrole in sr_info.iteritems():
                                                number = 0
                                                migrated = False
                                                for sd_partition in storagedriver.partitions:
                                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                                        if sd_partition.partition_guid == partition.guid:
                                                            number = max(sd_partition.number, number)
                                                if migrated is False:
                                                    sd_partition = StorageDriverPartition()
                                                    sd_partition.role = role
                                                    sd_partition.sub_role = subrole
                                                    sd_partition.partition = partition
                                                    sd_partition.storagedriver = storagedriver
                                                    sd_partition.size = None
                                                    sd_partition.number = number + 1
                                                    sd_partition.save()
                                                    if folder:
                                                        source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name))
                                                    else:
                                                        source = entry
                                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                                    path = sd_partition.path.rsplit('/', 1)[0]
                                                    if path:
                                                        client.dir_create(path)
                                                        client.dir_chown(path, 'ovs', 'ovs')
                                                    client.symlink({sd_partition.path: source})
                                                    migrated_objects[source] = sd_partition
                                            if is_list:
                                                storagedriver._data[key].remove(entry)
                                                if len(storagedriver._data[key]) == 0:
                                                    del storagedriver._data[key]
                                            else:
                                                del storagedriver._data[key]
                                            storagedriver.save()
                if 'mountpoint_bfs' in storagedriver._data:
                    storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs']
                    if not storagedriver.mountpoint_dfs:
                        storagedriver.mountpoint_dfs = None
                    del storagedriver._data['mountpoint_bfs']
                    storagedriver.save()
                if 'mountpoint_temp' in storagedriver._data:
                    del storagedriver._data['mountpoint_temp']
                    storagedriver.save()
                if migrated_objects:
                    print 'Loading sizes'
                    config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id)
                    config.load()
                    for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []):
                        path = readcache.get('path', '').rsplit('/', 1)[0]
                        size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()
                    for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []):
                        path = writecache.get('path', '')
                        size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()

            working_version = 4

        # Version 5 introduced:
        # - Failure Domains
        if working_version < 5:
            import os
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.lists.failuredomainlist import FailureDomainList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            failure_domains = FailureDomainList.get_failure_domains()
            if len(failure_domains) > 0:
                failure_domain = failure_domains[0]
            else:
                failure_domain = FailureDomain()
                failure_domain.name = 'Default'
                failure_domain.save()
            for storagerouter in StorageRouterList.get_storagerouters():
                change = False
                if storagerouter.primary_failure_domain is None:
                    storagerouter.primary_failure_domain = failure_domain
                    change = True
                if storagerouter.rdma_capable is None:
                    client = SSHClient(storagerouter, username='******')
                    rdma_capable = False
                    with remote(client.ip, [os], username='******') as rem:
                        for root, dirs, files in rem.os.walk('/sys/class/infiniband'):
                            for directory in dirs:
                                ports_dir = '/'.join([root, directory, 'ports'])
                                if not rem.os.path.exists(ports_dir):
                                    continue
                                for sub_root, sub_dirs, _ in rem.os.walk(ports_dir):
                                    if sub_root != ports_dir:
                                        continue
                                    for sub_directory in sub_dirs:
                                        state_file = '/'.join([sub_root, sub_directory, 'state'])
                                        if rem.os.path.exists(state_file):
                                            if 'ACTIVE' in client.run('cat {0}'.format(state_file)):
                                                rdma_capable = True
                    storagerouter.rdma_capable = rdma_capable
                    change = True
                if change is True:
                    storagerouter.save()

            working_version = 5

        # Version 6 introduced:
        # - Distributed scrubbing
        if working_version < 6:
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.sshclient import SSHClient
            for storage_driver in StorageDriverList.get_storagedrivers():
                root_client = SSHClient(storage_driver.storagerouter, username='******')
                for partition in storage_driver.partitions:
                    if partition.role == DiskPartition.ROLES.SCRUB:
                        old_path = partition.path
                        partition.sub_role = None
                        partition.save()
                        partition.invalidate_dynamics(['folder', 'path'])
                        if root_client.dir_exists(partition.path):
                            continue  # New directory already exists
                        if '_mds_' in old_path:
                            if root_client.dir_exists(old_path):
                                root_client.symlink({partition.path: old_path})
                        if not root_client.dir_exists(partition.path):
                            root_client.dir_create(partition.path)
                        root_client.dir_chmod(partition.path, 0777)

            working_version = 6

        # Version 7 introduced:
        # - vPool status
        if working_version < 7:
            from ovs.dal.hybrids import vpool
            reload(vpool)
            from ovs.dal.hybrids.vpool import VPool
            from ovs.dal.lists.vpoollist import VPoolList
            for _vpool in VPoolList.get_vpools():
                vpool = VPool(_vpool.guid)
                if hasattr(vpool, 'status') and vpool.status is None:
                    vpool.status = VPool.STATUSES.RUNNING
                    vpool.save()

            working_version = 7

        # Version 10 introduced:
        # - Reverse indexes are stored in persistent store
        # - Store more non-changing metadata on disk iso using a dynamic property
        if working_version < 10:
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.datalist import DataList
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            from ovs.extensions.storage.volatilefactory import VolatileFactory
            persistent = PersistentFactory.get_client()
            for prefix in ['ovs_listcache', 'ovs_reverseindex']:
                for key in persistent.prefix(prefix):
                    persistent.delete(key)
            for key in persistent.prefix('ovs_data_'):
                persistent.set(key, persistent.get(key))
            base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}'
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                all_objects = DataList(cls, {'type': DataList.where_operator.AND,
                                             'items': []})
                for item in all_objects:
                    guid = item.guid
                    for relation in item._relations:
                        if relation.foreign_type is None:
                            rcls = cls
                            rclsname = rcls.__name__.lower()
                        else:
                            rcls = relation.foreign_type
                            rclsname = rcls.__name__.lower()
                        key = relation.name
                        rguid = item._data[key]['guid']
                        if rguid is not None:
                            reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid)
                            persistent.set(reverse_key, 0)
            volatile = VolatileFactory.get_client()
            try:
                volatile._client.flush_all()
            except:
                pass
            from ovs.dal.lists.vdisklist import VDiskList
            for vdisk in VDiskList.get_vdisks():
                try:
                    vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                      'cluster_multiplier': vdisk.info['cluster_multiplier']}
                    vdisk.save()
                except:
                    pass

            working_version = 10

        # Version 11 introduced:
        # - ALBA accelerated ALBA, meaning different vpool.metadata information
        if working_version < 11:
            from ovs.dal.lists.vpoollist import VPoolList

            for vpool in VPoolList.get_vpools():
                vpool.metadata = {'backend': vpool.metadata}
                if 'metadata' in vpool.metadata['backend']:
                    vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata')
                if 'backend_info' in vpool.metadata['backend']:
                    vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True
                    vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False
                vpool.save()
            working_version = 11

        return working_version
Exemple #25
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        version 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - The datastore is still empty, add defaults
        if working_version < 1:
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.backendtype import BackendType
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(
                random.choice(string.ascii_letters + string.digits +
                              '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [(admin_group, [read_role, write_role, manage_role]),
                       (viewers_group, [read_role])]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add backends
            for backend_type_info in [('Ceph', 'ceph_s3'),
                                      ('Amazon', 'amazon_s3'),
                                      ('Swift', 'swift_s3'),
                                      ('Local', 'local'),
                                      ('Distributed', 'distributed'),
                                      ('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [
                    ServiceType.SERVICE_TYPES.MD_SERVER,
                    ServiceType.SERVICE_TYPES.ALBA_PROXY,
                    ServiceType.SERVICE_TYPES.ARAKOON
            ]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

            # Failure Domain
            failure_domain = FailureDomain()
            failure_domain.name = 'Default'
            failure_domain.save()

            # We're now at version 1
            working_version = 1

        # Version 2 introduced:
        # - new Descriptor format
        if working_version < 2:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[
                            entry] and 'hybrids' in data[entry]['source']:
                        filename = data[entry]['source']
                        if not filename.startswith('/'):
                            filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(
                                filename)
                        module = imp.load_source(data[entry]['name'], filename)
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            # We're now at version 2
            working_version = 2

        # Version 3 introduced:
        # - new Descriptor format
        if working_version < 3:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry],
                                  dict) and 'source' in data[entry]:
                        module = imp.load_source(data[entry]['name'],
                                                 data[entry]['source'])
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            working_version = 3

        # Version 4 introduced:
        # - Flexible SSD layout
        if working_version < 4:
            import os
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
            for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.MD_SERVER).services:
                mds_service = service.mds_service
                storagedriver = None
                for current_storagedriver in service.storagerouter.storagedrivers:
                    if current_storagedriver.vpool_guid == mds_service.vpool_guid:
                        storagedriver = current_storagedriver
                        break
                tasks = {}
                if storagedriver._data.get('mountpoint_md'):
                    tasks['{0}/mds_{1}_{2}'.format(
                        storagedriver._data.get('mountpoint_md'),
                        storagedriver.vpool.name, mds_service.number)] = (
                            DiskPartition.ROLES.DB,
                            StorageDriverPartition.SUBROLE.MDS)
                if storagedriver._data.get('mountpoint_temp'):
                    tasks['{0}/mds_{1}_{2}'.format(
                        storagedriver._data.get('mountpoint_temp'),
                        storagedriver.vpool.name, mds_service.number)] = (
                            DiskPartition.ROLES.SCRUB,
                            StorageDriverPartition.SUBROLE.MDS)
                for disk in service.storagerouter.disks:
                    for partition in disk.partitions:
                        for directory, (role, subrole) in tasks.iteritems():
                            with remote(storagedriver.storagerouter.ip, [os],
                                        username='******') as rem:
                                stat_dir = directory
                                while not rem.os.path.exists(
                                        stat_dir) and stat_dir != '/':
                                    stat_dir = stat_dir.rsplit('/', 1)[0]
                                    if not stat_dir:
                                        stat_dir = '/'
                                inode = rem.os.stat(stat_dir).st_dev
                            if partition.inode == inode:
                                if role not in partition.roles:
                                    partition.roles.append(role)
                                    partition.save()
                                number = 0
                                migrated = False
                                for sd_partition in storagedriver.partitions:
                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                        if sd_partition.mds_service == mds_service:
                                            migrated = True
                                            break
                                        if sd_partition.partition_guid == partition.guid:
                                            number = max(
                                                sd_partition.number, number)
                                if migrated is False:
                                    sd_partition = StorageDriverPartition()
                                    sd_partition.role = role
                                    sd_partition.sub_role = subrole
                                    sd_partition.partition = partition
                                    sd_partition.storagedriver = storagedriver
                                    sd_partition.mds_service = mds_service
                                    sd_partition.size = None
                                    sd_partition.number = number + 1
                                    sd_partition.save()
                                    client = SSHClient(
                                        storagedriver.storagerouter,
                                        username='******')
                                    path = sd_partition.path.rsplit('/', 1)[0]
                                    if path:
                                        client.dir_create(path)
                                        client.dir_chown(path, 'ovs', 'ovs')
                                    client.dir_create(directory)
                                    client.dir_chown(directory, 'ovs', 'ovs')
                                    client.symlink(
                                        {sd_partition.path: directory})
            for storagedriver in StorageDriverList.get_storagedrivers():
                migrated_objects = {}
                for disk in storagedriver.storagerouter.disks:
                    for partition in disk.partitions:
                        # Process all mountpoints that are unique and don't have a specified size
                        for key, (role, sr_info) in {
                                'mountpoint_md': (DiskPartition.ROLES.DB, {
                                    'metadata_{0}':
                                    StorageDriverPartition.SUBROLE.MD,
                                    'tlogs_{0}':
                                    StorageDriverPartition.SUBROLE.TLOG
                                }),
                                'mountpoint_fragmentcache':
                            (DiskPartition.ROLES.WRITE, {
                                'fcache_{0}':
                                StorageDriverPartition.SUBROLE.FCACHE
                            }),
                                'mountpoint_foc': (DiskPartition.ROLES.WRITE, {
                                    'fd_{0}':
                                    StorageDriverPartition.SUBROLE.FD,
                                    'dtl_{0}':
                                    StorageDriverPartition.SUBROLE.DTL
                                }),
                                'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {
                                    'fd_{0}':
                                    StorageDriverPartition.SUBROLE.FD,
                                    'dtl_{0}':
                                    StorageDriverPartition.SUBROLE.DTL
                                }),
                                'mountpoint_readcaches':
                            (DiskPartition.ROLES.READ, {
                                '': None
                            }),
                                'mountpoint_writecaches':
                            (DiskPartition.ROLES.WRITE, {
                                'sco_{0}': StorageDriverPartition.SUBROLE.SCO
                            })
                        }.iteritems():
                            if key in storagedriver._data:
                                is_list = isinstance(storagedriver._data[key],
                                                     list)
                                entries = storagedriver._data[
                                    key][:] if is_list is True else [
                                        storagedriver._data[key]
                                    ]
                                for entry in entries:
                                    if not entry:
                                        if is_list:
                                            storagedriver._data[key].remove(
                                                entry)
                                            if len(storagedriver._data[key]
                                                   ) == 0:
                                                del storagedriver._data[key]
                                        else:
                                            del storagedriver._data[key]
                                    else:
                                        with remote(
                                                storagedriver.storagerouter.ip,
                                            [os],
                                                username='******') as rem:
                                            inode = rem.os.stat(entry).st_dev
                                        if partition.inode == inode:
                                            if role not in partition.roles:
                                                partition.roles.append(role)
                                                partition.save()
                                            for folder, subrole in sr_info.iteritems(
                                            ):
                                                number = 0
                                                migrated = False
                                                for sd_partition in storagedriver.partitions:
                                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                                        if sd_partition.partition_guid == partition.guid:
                                                            number = max(
                                                                sd_partition.
                                                                number, number)
                                                if migrated is False:
                                                    sd_partition = StorageDriverPartition(
                                                    )
                                                    sd_partition.role = role
                                                    sd_partition.sub_role = subrole
                                                    sd_partition.partition = partition
                                                    sd_partition.storagedriver = storagedriver
                                                    sd_partition.size = None
                                                    sd_partition.number = number + 1
                                                    sd_partition.save()
                                                    if folder:
                                                        source = '{0}/{1}'.format(
                                                            entry,
                                                            folder.format(
                                                                storagedriver.
                                                                vpool.name))
                                                    else:
                                                        source = entry
                                                    client = SSHClient(
                                                        storagedriver.
                                                        storagerouter,
                                                        username='******')
                                                    path = sd_partition.path.rsplit(
                                                        '/', 1)[0]
                                                    if path:
                                                        client.dir_create(path)
                                                        client.dir_chown(
                                                            path, 'ovs', 'ovs')
                                                    client.symlink({
                                                        sd_partition.path:
                                                        source
                                                    })
                                                    migrated_objects[
                                                        source] = sd_partition
                                            if is_list:
                                                storagedriver._data[
                                                    key].remove(entry)
                                                if len(storagedriver._data[key]
                                                       ) == 0:
                                                    del storagedriver._data[
                                                        key]
                                            else:
                                                del storagedriver._data[key]
                                            storagedriver.save()
                if 'mountpoint_bfs' in storagedriver._data:
                    storagedriver.mountpoint_dfs = storagedriver._data[
                        'mountpoint_bfs']
                    if not storagedriver.mountpoint_dfs:
                        storagedriver.mountpoint_dfs = None
                    del storagedriver._data['mountpoint_bfs']
                    storagedriver.save()
                if 'mountpoint_temp' in storagedriver._data:
                    del storagedriver._data['mountpoint_temp']
                    storagedriver.save()
                if migrated_objects:
                    print 'Loading sizes'
                    config = StorageDriverConfiguration(
                        'storagedriver', storagedriver.vpool_guid,
                        storagedriver.storagedriver_id)
                    config.load()
                    for readcache in config.configuration.get(
                            'content_addressed_cache',
                        {}).get('clustercache_mount_points', []):
                        path = readcache.get('path', '').rsplit('/', 1)[0]
                        size = int(readcache['size'].strip(
                            'KiB')) * 1024 if 'size' in readcache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()
                    for writecache in config.configuration.get(
                            'scocache', {}).get('scocache_mount_points', []):
                        path = writecache.get('path', '')
                        size = int(writecache['size'].strip(
                            'KiB')) * 1024 if 'size' in writecache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()

            working_version = 4

        # Version 5 introduced:
        # - Failure Domains
        if working_version < 5:
            import os
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.lists.failuredomainlist import FailureDomainList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            failure_domains = FailureDomainList.get_failure_domains()
            if len(failure_domains) > 0:
                failure_domain = failure_domains[0]
            else:
                failure_domain = FailureDomain()
                failure_domain.name = 'Default'
                failure_domain.save()
            for storagerouter in StorageRouterList.get_storagerouters():
                change = False
                if storagerouter.primary_failure_domain is None:
                    storagerouter.primary_failure_domain = failure_domain
                    change = True
                if storagerouter.rdma_capable is None:
                    client = SSHClient(storagerouter, username='******')
                    rdma_capable = False
                    with remote(client.ip, [os], username='******') as rem:
                        for root, dirs, files in rem.os.walk(
                                '/sys/class/infiniband'):
                            for directory in dirs:
                                ports_dir = '/'.join(
                                    [root, directory, 'ports'])
                                if not rem.os.path.exists(ports_dir):
                                    continue
                                for sub_root, sub_dirs, _ in rem.os.walk(
                                        ports_dir):
                                    if sub_root != ports_dir:
                                        continue
                                    for sub_directory in sub_dirs:
                                        state_file = '/'.join(
                                            [sub_root, sub_directory, 'state'])
                                        if rem.os.path.exists(state_file):
                                            if 'ACTIVE' in client.run(
                                                    'cat {0}'.format(
                                                        state_file)):
                                                rdma_capable = True
                    storagerouter.rdma_capable = rdma_capable
                    change = True
                if change is True:
                    storagerouter.save()

            working_version = 5

        # Version 6 introduced:
        # - Distributed scrubbing
        if working_version < 6:
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.sshclient import SSHClient
            for storage_driver in StorageDriverList.get_storagedrivers():
                root_client = SSHClient(storage_driver.storagerouter,
                                        username='******')
                for partition in storage_driver.partitions:
                    if partition.role == DiskPartition.ROLES.SCRUB:
                        old_path = partition.path
                        partition.sub_role = None
                        partition.save()
                        partition.invalidate_dynamics(['folder', 'path'])
                        if root_client.dir_exists(partition.path):
                            continue  # New directory already exists
                        if '_mds_' in old_path:
                            if root_client.dir_exists(old_path):
                                root_client.symlink({partition.path: old_path})
                        if not root_client.dir_exists(partition.path):
                            root_client.dir_create(partition.path)
                        root_client.dir_chmod(partition.path, 0777)

            working_version = 6

        # Version 7 introduced:
        # - vPool status
        if working_version < 7:
            from ovs.dal.hybrids import vpool
            reload(vpool)
            from ovs.dal.hybrids.vpool import VPool
            from ovs.dal.lists.vpoollist import VPoolList
            for _vpool in VPoolList.get_vpools():
                vpool = VPool(_vpool.guid)
                if hasattr(vpool, 'status') and vpool.status is None:
                    vpool.status = VPool.STATUSES.RUNNING
                    vpool.save()

            working_version = 7

        # Version 10 introduced:
        # - Reverse indexes are stored in persistent store
        # - Store more non-changing metadata on disk iso using a dynamic property
        if working_version < 10:
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.datalist import DataList
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            from ovs.extensions.storage.volatilefactory import VolatileFactory
            persistent = PersistentFactory.get_client()
            for prefix in ['ovs_listcache', 'ovs_reverseindex']:
                for key in persistent.prefix(prefix):
                    persistent.delete(key)
            for key in persistent.prefix('ovs_data_'):
                persistent.set(key, persistent.get(key))
            base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}'
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                all_objects = DataList(cls, {
                    'type': DataList.where_operator.AND,
                    'items': []
                })
                for item in all_objects:
                    guid = item.guid
                    for relation in item._relations:
                        if relation.foreign_type is None:
                            rcls = cls
                            rclsname = rcls.__name__.lower()
                        else:
                            rcls = relation.foreign_type
                            rclsname = rcls.__name__.lower()
                        key = relation.name
                        rguid = item._data[key]['guid']
                        if rguid is not None:
                            reverse_key = base_reverse_key.format(
                                rclsname, rguid, relation.foreign_key, guid)
                            persistent.set(reverse_key, 0)
            volatile = VolatileFactory.get_client()
            try:
                volatile._client.flush_all()
            except:
                pass
            from ovs.dal.lists.vdisklist import VDiskList
            for vdisk in VDiskList.get_vdisks():
                try:
                    vdisk.metadata = {
                        'lba_size': vdisk.info['lba_size'],
                        'cluster_multiplier': vdisk.info['cluster_multiplier']
                    }
                    vdisk.save()
                except:
                    pass

            working_version = 10

        # Version 11 introduced:
        # - ALBA accelerated ALBA, meaning different vpool.metadata information
        if working_version < 11:
            from ovs.dal.lists.vpoollist import VPoolList

            for vpool in VPoolList.get_vpools():
                vpool.metadata = {'backend': vpool.metadata}
                if 'metadata' in vpool.metadata['backend']:
                    vpool.metadata['backend'][
                        'arakoon_config'] = vpool.metadata['backend'].pop(
                            'metadata')
                if 'backend_info' in vpool.metadata['backend']:
                    vpool.metadata['backend']['backend_info'][
                        'fragment_cache_on_read'] = True
                    vpool.metadata['backend']['backend_info'][
                        'fragment_cache_on_write'] = False
                vpool.save()
            working_version = 11

        return working_version
    def _bootstrap_dal_models(self):
        """
        Load/hook dal models as snmp oids
        """
        _guids = set()

        enabled_key = "{0}_config_dal_enabled".format(STORAGE_PREFIX)
        self.instance_oid = 0
        try:
            enabled = self.persistent.get(enabled_key)
        except KeyNotFoundException:
            enabled = True  # Enabled by default, can be disabled by setting the key
        if enabled:
            from ovs.dal.lists.vdisklist import VDiskList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.dal.lists.pmachinelist import PMachineList
            from ovs.dal.lists.vmachinelist import VMachineList
            from ovs.dal.lists.vpoollist import VPoolList
            from ovs.dal.lists.storagedriverlist import StorageDriverList

            for storagerouter in StorageRouterList.get_storagerouters():
                _guids.add(storagerouter.guid)
                if not self._check_added(storagerouter):
                    self._register_dal_model(10, storagerouter, 'guid', "0")
                    self._register_dal_model(10, storagerouter, 'name', "1")
                    self._register_dal_model(10, storagerouter, 'pmachine', "3", key = 'host_status')
                    self._register_dal_model(10, storagerouter, 'description', "4")
                    self._register_dal_model(10, storagerouter, 'devicename', "5")
                    self._register_dal_model(10, storagerouter, 'dtl_mode', "6")
                    self._register_dal_model(10, storagerouter, 'ip', "8")
                    self._register_dal_model(10, storagerouter, 'machineid', "9")
                    self._register_dal_model(10, storagerouter, 'status', "10")
                    self._register_dal_model(10, storagerouter, '#vdisks', "11",
                                             func = lambda storagerouter: len([vdisk for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#vmachines', "12",
                                             func = lambda storagerouter: len(set([vdisk.vmachine.guid for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id])),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#stored_data', "13",
                                             func = lambda storagerouter: sum([vdisk.vmachine.stored_data for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if vm.is_vtemplate:
                        self._register_dal_model(11, vm, 'guid', "0")
                        self._register_dal_model(11, vm, 'name', "1")

                        def _children(vmt):
                            children = 0
                            disks = [vd.guid for vd in vmt.vdisks]
                            for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]:
                                for disk in disks:
                                    if vdisk == disk:
                                        children += 1
                            return children
                        self._register_dal_model(11, vm, '#children', 2, func = _children, atype = int)
                        self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if not vm.is_vtemplate:
                        self._register_dal_model(0, vm, 'guid', "0")
                        self._register_dal_model(0, vm, 'name', "1")
                        self._register_dal_model(0, vm, 'statistics', "2.0", key = "operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.2", key = "data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.6", key = "write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.11", key = "backend_data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.12", key = "cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.16", key = "backend_data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.17", key = "data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.18", key = "read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.20", key = "data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.23", key = "timestamp", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.27", key = "data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.30", key = "operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.34", key = "data_transferred", atype = int)
                        self._register_dal_model(0, vm, 'stored_data', "3", atype = int)
                        self._register_dal_model(0, vm, 'description', "4")
                        self._register_dal_model(0, vm, 'devicename', "5")
                        self._register_dal_model(0, vm, 'dtl_mode', "6")
                        self._register_dal_model(0, vm, 'hypervisorid', "7")
                        self._register_dal_model(0, vm, 'ip', "8")
                        self._register_dal_model(0, vm, 'status', "10")
                        self._register_dal_model(0, vm, 'stored_data', "10", atype = int)
                        self._register_dal_model(0, vm, 'snapshots', "11", atype = int)
                        self._register_dal_model(0, vm, 'vdisks', "12", atype = int)
                        self._register_dal_model(0, vm, 'DTL', '13',
                                                 func = lambda vm: 'DEGRADED' if all(item == 'DEGRADED' for item in [vd.info['failover_mode'] for vd in vm.vdisks]) else 'OK')
                    self.instance_oid += 1

            for vd in VDiskList.get_vdisks():
                _guids.add(vd.guid)
                if not self._check_added(vd):
                    self._register_dal_model(1, vd, 'guid', "0")
                    self._register_dal_model(1, vd, 'name', "1")
                    self._register_dal_model(1, vd, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.1", key = "data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.20", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(1, vd, 'info', "3", key = 'stored', atype = int)
                    self._register_dal_model(1, vd, 'info', "4", key = 'failover_mode', atype = int)
                    self._register_dal_model(1, vd, 'snapshots', "5", atype = int)
                    self.instance_oid += 1

            for pm in PMachineList.get_pmachines():
                _guids.add(pm.guid)
                if not self._check_added(pm):
                    self._register_dal_model(2, pm, 'guid', "0")
                    self._register_dal_model(2, pm, 'name', "1")
                    self._register_dal_model(2, pm, 'host_status', "2")
                    self.instance_oid += 1

            for vp in VPoolList.get_vpools():
                _guids.add(vp.guid)
                if not self._check_added(vp):
                    self._register_dal_model(3, vp, 'guid', "0")
                    self._register_dal_model(3, vp, 'name', "1")
                    self._register_dal_model(3, vp, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.20", key = "data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(3, vp, 'status', "3")
                    self._register_dal_model(3, vp, 'description', "4")
                    self._register_dal_model(3, vp, 'vdisks', "5", atype = int)
                    self._register_dal_model(3, vp, '#vmachines', "6",
                                             func = lambda vp: len(set([vd.vmachine.guid for vd in vp.vdisks])),
                                             atype = int)
                    self.instance_oid += 1

            for storagedriver in StorageDriverList.get_storagedrivers():
                _guids.add(storagedriver.guid)
                if not self._check_added(storagedriver):
                    self._register_dal_model(4, storagedriver, 'guid', "0")
                    self._register_dal_model(4, storagedriver, 'name', "1")
                    self._register_dal_model(4, storagedriver, 'stored_data', "2", atype = int)
                    self.instance_oid += 1

            try:
                # try to load OVS Backends
                from ovs.dal.lists.albabackendlist import AlbaBackendList
                for backend in AlbaBackendList.get_albabackends():
                    _guids.add(backend.guid)
                    if not self._check_added(backend):
                        self._register_dal_model(5, backend, 'guid', 0)
                        self._register_dal_model(5, backend, 'name', 1)
                        for disk_id in range(len((backend.all_disks))):
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.0'.format(disk_id), key = "name", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.1'.format(disk_id), key = "usage.size", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.2'.format(disk_id), key = "usage.used", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.3'.format(disk_id), key = "usage.available", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.4'.format(disk_id), key = "state.state", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.5'.format(disk_id), key = "node_id", index=disk_id)

                        self.instance_oid += 1
            except ImportError:
                print('OVS Backend not present')
                pass
            reload = False
            for object_guid in list(self.model_oids):
                if object_guid not in _guids:
                    self.model_oids.remove(object_guid)
                    reload = True
            if reload:
                self._reload_snmp()
Exemple #27
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again
        *     Eg: /ovs/framework/migration|stats_monkey_integration: True
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.db.arakooninstaller import ArakoonInstaller
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator
        from ovs.extensions.packages.packagefactory import PackageFactory
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip,  # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='arakoon',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name),
                                                    new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name))

                # Register new service and remove old service
                service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']:
                            proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info
                            Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
                if service_manager.__class__ == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        #####################################
        # Update the vPool metadata structure
        def _update_metadata_structure(metadata):
            metadata = copy.deepcopy(metadata)
            cache_structure = {'read': False,
                               'write': False,
                               'is_backend': False,
                               'quota': None,
                               'backend_info': {'name': None,  # Will be filled in when is_backend is true
                                                'backend_guid': None,
                                                'alba_backend_guid': None,
                                                'policies': None,
                                                'preset': None,
                                                'arakoon_config': None,
                                                'connection_info': {'client_id': None,
                                                                    'client_secret': None,
                                                                    'host': None,
                                                                    'port': None,
                                                                    'local': None}}
                               }
            structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read',
                                                                      'write': 'block_cache_on_write',
                                                                      'quota': 'quota_bc',
                                                                      'backend_prefix': 'backend_bc_{0}'},
                             StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read',
                                                                         'write': 'fragment_cache_on_write',
                                                                         'quota': 'quota_fc',
                                                                         'backend_prefix': 'backend_aa_{0}'}}
            if 'arakoon_config' in metadata['backend']:  # Arakoon config should be placed under the backend info
                metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config')
            if 'connection_info' in metadata['backend']:  # Connection info sohuld be placed under the backend info
                metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info')
            if 'caching_info' not in metadata:  # Caching info is the new key
                would_be_caching_info = {}
                metadata['caching_info'] = would_be_caching_info
                # Extract all caching data for every storagerouter
                current_caching_info = metadata['backend'].pop('caching_info')  # Pop to mutate metadata
                for storagerouter_guid in current_caching_info.iterkeys():
                    current_cache_data = current_caching_info[storagerouter_guid]
                    storagerouter_caching_info = {}
                    would_be_caching_info[storagerouter_guid] = storagerouter_caching_info
                    for cache_type, cache_type_mapping in structure_map.iteritems():
                        new_cache_structure = copy.deepcopy(cache_structure)
                        storagerouter_caching_info[cache_type] = new_cache_structure
                        for new_structure_key, old_structure_key in cache_type_mapping.iteritems():
                            if new_structure_key == 'backend_prefix':
                                # Get possible backend related info
                                metadata_key = old_structure_key.format(storagerouter_guid)
                                if metadata_key not in metadata:
                                    continue
                                backend_data = metadata.pop(metadata_key)  # Pop to mutate metadata
                                new_cache_structure['is_backend'] = True
                                # Copy over the old data
                                new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config']
                                new_cache_structure['backend_info'].update(backend_data['backend_info'])
                                new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info'])
                            else:
                                new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key)
            return metadata

        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            try:
                new_metadata = _update_metadata_structure(vpool.metadata)
                vpool.metadata = new_metadata
                vpool.save()
            except KeyError:
                MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name))

        ##############################################
        # Always use indent=4 during Configuration set
        def _resave_all_config_entries(config_path='/ovs'):
            """
            Recursive functions which checks every config management key if its a directory or not.
            If not a directory, we retrieve the config and just save it again using the new indentation logic
            """
            for item in Configuration.list(config_path):
                new_path = config_path + '/' + item
                print new_path
                if Configuration.dir_exists(new_path) is True:
                    _resave_all_config_entries(config_path=new_path)
                else:
                    try:
                        _config = Configuration.get(new_path)
                        Configuration.set(new_path, _config)
                    except:
                        _config = Configuration.get(new_path, raw=True)
                        Configuration.set(new_path, _config, raw=True)
        if ExtensionMigrator.THIS_VERSION <= 13:  # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower
            MigrationController._logger.info('Re-saving every configuration setting with new indentation rules')
            _resave_all_config_entries()

        ############################
        # Update some default values
        def _update_manifest_cache_size(_proxy_config_key):
            updated = False
            manifest_cache_size = 500 * 1024 * 1024
            if Configuration.exists(key=_proxy_config_key):
                _proxy_config = Configuration.get(key=_proxy_config_key)
                for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]:
                    if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba':
                        if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size:
                            updated = True
                            _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size
                if _proxy_config['manifest_cache_size'] != manifest_cache_size:
                    updated = True
                    _proxy_config['manifest_cache_size'] = manifest_cache_size

                if updated is True:
                    Configuration.set(key=_proxy_config_key, value=_proxy_config)
            return updated

        for storagedriver in StorageDriverList.get_storagedrivers():
            try:
                vpool = storagedriver.vpool
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))  # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services
                for alba_proxy in storagedriver.alba_proxies:
                    if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True:
                        # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                        ExtensionsToolbox.edit_version_file(client=root_client,
                                                            package_name='alba',
                                                            old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name))

                # Update 'backend_connection_manager' section
                changes = False
                storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
                if 'backend_connection_manager' not in storagedriver_config.configuration:
                    continue

                current_config = storagedriver_config.configuration['backend_connection_manager']
                for key, value in current_config.iteritems():
                    if key.isdigit() is True:
                        if value.get('alba_connection_asd_connection_pool_capacity') != 10:
                            changes = True
                            value['alba_connection_asd_connection_pool_capacity'] = 10
                        if value.get('alba_connection_timeout') != 30:
                            changes = True
                            value['alba_connection_timeout'] = 30
                        if value.get('alba_connection_rora_manifest_cache_capacity') != 25000:
                            changes = True
                            value['alba_connection_rora_manifest_cache_capacity'] = 25000

                if changes is True:
                    storagedriver_config.clear_backend_connection_manager()
                    storagedriver_config.configure_backend_connection_manager(**current_config)
                    storagedriver_config.save(root_client)

                    # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='volumedriver',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
            except Exception:
                MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id))

        ####################################################
        # Adding proxy fail fast as env variable for proxies
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-albaproxy_'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'Environment=ALBA_FAIL_FAST=true'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'env ALBA_FAIL_FAST=true'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='alba',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        ######################################
        # Integration of stats monkey (2.10.2)
        if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False:
            try:
                # Get content of old key into new key
                old_stats_monkey_key = '/statsmonkey/statsmonkey'
                if Configuration.exists(key=old_stats_monkey_key) is True:
                    Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key))
                    Configuration.delete(key=old_stats_monkey_key)

                # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before
                celery_key = '/ovs/framework/scheduling/celery'
                current_value = None
                scheduling_config = Configuration.get(key=celery_key, default={})
                if 'statsmonkey.run_all_stats' in scheduling_config:  # Old celery task name of the stats monkey
                    current_value = scheduling_config.pop('statsmonkey.run_all_stats')
                scheduling_config['ovs.stats_monkey.run_all'] = current_value
                scheduling_config['alba.stats_monkey.run_all'] = current_value
                Configuration.set(key=celery_key, value=scheduling_config)

                support_key = '/ovs/framework/support'
                support_config = Configuration.get(key=support_key)
                support_config['support_agent'] = support_config.pop('enabled', True)
                support_config['remote_access'] = support_config.pop('enablesupport', False)
                Configuration.set(key=support_key, value=support_config)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True)
            except Exception:
                MigrationController._logger.exception('Integration of stats monkey failed')

        ######################################################
        # Write away cluster ID to a file for back-up purposes
        try:
            cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None)
            with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file:
                config = json.load(config_file)
            if cluster_id is not None and config.get('cluster_id', None) is None:
                config['cluster_id'] = cluster_id
                with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file:
                    json.dump(config, config_file, indent=4)
        except Exception:
            MigrationController._logger.exception('Writing cluster id to a file failed.')

        #########################################################
        # Additional string formatting in Arakoon services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False:
                arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')]
                for storagerouter in StorageRouterList.get_masters():
                    for service_name in arakoon_service_names:
                        config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name)
                        if Configuration.exists(key=config_key):
                            config = Configuration.get(key=config_key)
                            config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                            config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON
                            config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON
                            Configuration.set(key=config_key, value=config)
                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in ALBA proxy services (2.11)
        changed_clients = set()
        try:
            if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False:
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA)
                for service in ServiceTypeList.get_by_name('AlbaProxy').services:
                    root_client = sr_client_map[service.storagerouter_guid]
                    config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name)
                    if Configuration.exists(key=config_key):
                        config = Configuration.get(key=config_key)
                        config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                        config['ALBA_PKG_NAME'] = alba_pkg_name
                        config['ALBA_VERSION_CMD'] = alba_version_cmd
                        Configuration.set(key=config_key, value=config)
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                                           client=root_client,
                                                           target_name='ovs-{0}'.format(service.name))
                        changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in DTL/VOLDRV services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False:
                sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for vpool in VPoolList.get_vpools():
                    for storagedriver in vpool.storagedrivers:
                        root_client = sr_client_map[storagedriver.storagerouter_guid]
                        for entry in ['dtl', 'volumedriver']:
                            service_name = '{0}_{1}'.format(entry, vpool.name)
                            service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD
                            config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name)
                            if Configuration.exists(key=config_key):
                                config = Configuration.get(key=config_key)
                                config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                                config['VOLDRV_PKG_NAME'] = sd_pkg_name
                                config['VOLDRV_VERSION_CMD'] = sd_version_cmd
                                Configuration.set(key=config_key, value=config)
                                service_manager.regenerate_service(name=service_template,
                                                                   client=root_client,
                                                                   target_name='ovs-{0}'.format(service_name))
                                changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        #######################################################
        # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876)
        if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False:
            try:
                voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for storagerouter in StorageRouterList.get_storagerouters():
                    root_client = sr_client_map.get(storagerouter.guid)
                    if root_client is None:
                        continue

                    for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        regenerate = False
                        if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER:
                            if 'volumedriver-server' in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER)
                                root_client.file_write(filename=file_path, contents=contents)
                        elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE:
                            if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE)
                                contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE)
                                root_client.file_write(filename=file_path, contents=contents)

                        if regenerate is True:
                            service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD,
                                                               client=root_client,
                                                               target_name='ovs-{0}'.format(file_name.split('.')[0]))  # Leave out .version
                            changed_clients.add(root_client)
                Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True)
            except Exception:
                MigrationController._logger.exception('Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed')

        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])
        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagerouter in StorageRouterList.get_storagerouters():
                root_client = sr_client_map[storagerouter.guid]
                for service_name in service_manager.list_services(client=root_client):
                    if not service_name.startswith('ovs-arakoon-'):
                        continue
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        MigrationController._logger.info('Finished out of band migrations')
Exemple #28
0
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        Actions:
            * Verify which StorageRouters are available
            * Make mapping between vPools and its StorageRouters
            * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available
            * For each vPool retrieve the optimal configuration and store it for each StorageDriver
            * For each vPool run an ensure safety for all vDisks
        :raises RuntimeError: When ensure safety fails for any vDisk
        :return: None
        :rtype: NoneType
        """
        MDSServiceController._logger.info('Started')

        # Verify StorageRouter availability
        root_client_cache = {}
        storagerouters = StorageRouterList.get_storagerouters()
        storagerouters.sort(key=lambda _sr: ExtensionsToolbox.advanced_sort(
            element=_sr.ip, separator='.'))
        offline_nodes = []
        for storagerouter in storagerouters:
            try:
                root_client = SSHClient(endpoint=storagerouter,
                                        username='******')
                MDSServiceController._logger.debug(
                    'StorageRouter {0} - ONLINE'.format(storagerouter.name))
            except UnableToConnectException:
                root_client = None
                offline_nodes.append(storagerouter)
                MDSServiceController._logger.error(
                    'StorageRouter {0} - OFFLINE'.format(storagerouter.name))
            root_client_cache[storagerouter] = root_client

        # Create mapping per vPool and its StorageRouters
        mds_dict = collections.OrderedDict()
        for vpool in sorted(VPoolList.get_vpools(), key=lambda k: k.name):
            MDSServiceController._logger.info('vPool {0}'.format(vpool.name))
            mds_dict[vpool] = {}

            # Loop all StorageDrivers and add StorageDriver to mapping
            for storagedriver in vpool.storagedrivers:
                storagerouter = storagedriver.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': storagedriver
                    }

            # Loop all MDS Services and append services to appropriate vPool / StorageRouter combo
            mds_services = vpool.mds_services
            mds_services.sort(
                key=lambda _mds_service: ExtensionsToolbox.advanced_sort(
                    element=_mds_service.service.storagerouter.ip,
                    separator='.'))
            for mds_service in mds_services:
                service = mds_service.service
                storagerouter = service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': None
                    }
                MDSServiceController._logger.debug(
                    'vPool {0} - StorageRouter {1} - Service on port {2}'.
                    format(vpool.name, storagerouter.name, service.ports[0]))
                mds_dict[vpool][storagerouter]['services'].append(mds_service)

        failures = []
        for vpool, storagerouter_info in mds_dict.iteritems():
            # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded
            # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0)
            max_load = Configuration.get(
                '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid))
            for storagerouter in sorted(storagerouter_info,
                                        key=lambda k: k.ip):
                total_load = 0.0
                root_client = mds_dict[vpool][storagerouter]['client']
                mds_services = mds_dict[vpool][storagerouter]['services']

                for mds_service in list(
                        sorted(mds_services, key=lambda k: k.number)):
                    port = mds_service.service.ports[0]
                    number = mds_service.number
                    # Manual intervention required here in order for the MDS to be cleaned up
                    # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove
                    if mds_service.capacity == 0 and len(
                            mds_service.vdisks_guids) == 0:
                        MDSServiceController._logger.warning(
                            'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing'
                            .format(vpool.name, storagerouter.name, number,
                                    port))
                        try:
                            MDSServiceController.remove_mds_service(
                                mds_service=mds_service,
                                reconfigure=True,
                                allow_offline=root_client is None)
                        except Exception:
                            MDSServiceController._logger.exception(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove'
                                .format(vpool.name, storagerouter.name, number,
                                        port))
                        mds_services.remove(mds_service)
                    else:
                        _, next_load = MDSServiceController.get_mds_load(
                            mds_service=mds_service)
                        if next_load == float('inf'):
                            total_load = sys.maxint * -1  # Cast to lowest possible value if any MDS service capacity is set to infinity
                        else:
                            total_load += next_load

                        if next_load < max_load:
                            MDSServiceController._logger.debug(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%'
                                .format(vpool.name, storagerouter.name, number,
                                        port, next_load))
                        else:
                            MDSServiceController._logger.debug(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%'
                                .format(vpool.name, storagerouter.name, number,
                                        port, next_load))

                if total_load >= max_load * len(mds_services):
                    mds_services_to_add = int(
                        math.ceil((total_load - max_load * len(mds_services)) /
                                  max_load))
                    MDSServiceController._logger.info(
                        'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added'
                        .format(vpool.name, storagerouter.name,
                                total_load / len(mds_services), max_load,
                                mds_services_to_add,
                                '' if mds_services_to_add == 1 else 's'))

                    for _ in range(mds_services_to_add):
                        MDSServiceController._logger.info(
                            'vPool {0} - StorageRouter {1} - Adding new MDS Service'
                            .format(vpool.name, storagerouter.name))
                        try:
                            mds_services.append(
                                MDSServiceController.prepare_mds_service(
                                    storagerouter=storagerouter, vpool=vpool))
                        except Exception:
                            MDSServiceController._logger.exception(
                                'vPool {0} - StorageRouter {1} - Failed to create new MDS Service'
                                .format(vpool.name, storagerouter.name))

            # After potentially having added new MDSes, retrieve the optimal configuration
            mds_config_set = {}
            try:
                mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                    vpool=vpool, offline_nodes=offline_nodes)
                MDSServiceController._logger.debug(
                    'vPool {0} - Optimal configuration {1}'.format(
                        vpool.name, mds_config_set))
            except (NotFoundException, RuntimeError):
                MDSServiceController._logger.exception(
                    'vPool {0} - Failed to retrieve the optimal configuration'.
                    format(vpool.name))

            # Apply the optimal MDS configuration per StorageDriver
            for storagerouter in sorted(storagerouter_info,
                                        key=lambda k: k.ip):
                root_client = mds_dict[vpool][storagerouter]['client']
                storagedriver = mds_dict[vpool][storagerouter]['storagedriver']

                if storagedriver is None:
                    MDSServiceController._logger.critical(
                        'vPool {0} - StorageRouter {1} - No matching StorageDriver found'
                        .format(vpool.name, storagerouter.name))
                    continue
                if storagerouter.guid not in mds_config_set:
                    MDSServiceController._logger.critical(
                        'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config'
                        .format(vpool.name, storagerouter.name))
                    continue
                if root_client is None:
                    MDSServiceController._logger.debug(
                        'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration'
                        .format(vpool.name, storagerouter.name))
                    continue

                storagedriver_config = StorageDriverConfiguration(
                    vpool_guid=vpool.guid,
                    storagedriver_id=storagedriver.storagedriver_id)
                if storagedriver_config.config_missing is False:
                    optimal_mds_config = mds_config_set[storagerouter.guid]
                    MDSServiceController._logger.debug(
                        'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}'
                        .format(vpool.name, storagerouter.name,
                                optimal_mds_config))
                    # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                    # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                    storagedriver_config.configure_filesystem(
                        fs_metadata_backend_mds_nodes=optimal_mds_config)
                    storagedriver_config.save(root_client)

            # Execute a safety check, making sure the master/slave configuration is optimal.
            MDSServiceController._logger.info(
                'vPool {0} - Ensuring safety for all vDisks'.format(
                    vpool.name))
            for vdisk in vpool.vdisks:
                try:
                    MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
                except Exception:
                    message = 'Ensure safety for vDisk {0} with guid {1} failed'.format(
                        vdisk.name, vdisk.guid)
                    MDSServiceController._logger.exception(message)
                    failures.append(message)
        if len(failures) > 0:
            raise RuntimeError('\n - ' + '\n - '.join(failures))
        MDSServiceController._logger.info('Finished')
Exemple #29
0
    def execute_scrub():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Scrubber - Started')
        vpools = VPoolList.get_vpools()
        error_messages = []
        scrub_locations = []
        for storage_router in StorageRouterList.get_storagerouters():
            scrub_partitions = storage_router.partition_config.get(
                DiskPartition.ROLES.SCRUB, [])
            if len(scrub_partitions) == 0:
                continue
            if len(scrub_partitions) > 1:
                raise RuntimeError(
                    'Multiple {0} partitions defined for StorageRouter {1}'.
                    format(DiskPartition.ROLES.SCRUB, storage_router.name))

            partition = DiskPartition(scrub_partitions[0])
            ScheduledTaskController._logger.info(
                'Scrubber - Storage Router {0:<15} has {1} partition at {2}'.
                format(storage_router.ip, DiskPartition.ROLES.SCRUB,
                       partition.folder))
            try:
                SSHClient(storage_router, 'root')
                scrub_locations.append({
                    'scrub_path': str(partition.folder),
                    'storage_router': storage_router
                })
            except UnableToConnectException:
                ScheduledTaskController._logger.warning(
                    'Scrubber - Storage Router {0:<15} is not reachable'.
                    format(storage_router.ip))

        number_of_vpools = len(vpools)
        if number_of_vpools >= 6:
            max_threads_per_vpool = 1
        elif number_of_vpools >= 3:
            max_threads_per_vpool = 2
        else:
            max_threads_per_vpool = 5

        threads = []
        counter = 0
        for vp in vpools:
            # Verify amount of vDisks on vPool
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - Checking scrub work'.format(vp.name))
            if len(vp.vdisks) == 0:
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - No scrub work'.format(vp.name))
                continue

            # Fill queue with all vDisks for current vPool
            vpool_queue = Queue()
            for vd in vp.vdisks:
                if vd.is_vtemplate is True:
                    continue
                vd.invalidate_dynamics('storagedriver_id')
                if not vd.storagedriver_id:
                    ScheduledTaskController._logger.warning(
                        'Scrubber - vPool {0} - vDisk {1} {2} - No StorageDriver ID found'
                        .format(vp.name, vd.guid, vd.name))
                    continue
                vpool_queue.put(vd.guid)

            # Copy backend connection manager information in separate key
            threads_to_spawn = min(max_threads_per_vpool, len(scrub_locations))
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - Spawning {1} thread{2}'.format(
                    vp.name, threads_to_spawn,
                    '' if threads_to_spawn == 1 else 's'))
            for _ in range(threads_to_spawn):
                scrub_target = scrub_locations[counter % len(scrub_locations)]
                thread = Thread(
                    target=ScheduledTaskController.execute_scrub_work,
                    name='scrub_{0}_{1}'.format(
                        vp.guid, scrub_target['storage_router'].guid),
                    args=(vpool_queue, vp, scrub_target, error_messages))
                thread.start()
                threads.append(thread)
                counter += 1

        for thread in threads:
            thread.join()

        if len(error_messages) > 0:
            raise Exception('Errors occurred while scrubbing:\n  - {0}'.format(
                '\n  - '.join(error_messages)))
Exemple #30
0
    def shrink_vpool(cls,
                     storagedriver_guid,
                     offline_storage_router_guids=list()):
        """
        Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well)
        :param storagedriver_guid: Guid of the StorageDriver to remove
        :type storagedriver_guid: str
        :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster.
                                             WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS
        :type offline_storage_router_guids: list
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        # TODO: Unit test individual pieces of code
        # Validations
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        cls._logger.info(
            'StorageDriver {0} - Deleting StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        vp_installer = VPoolInstaller(name=storagedriver.vpool.name)
        vp_installer.validate(storagedriver=storagedriver)

        sd_installer = StorageDriverInstaller(vp_installer=vp_installer,
                                              storagedriver=storagedriver)

        cls._logger.info(
            'StorageDriver {0} - Checking availability of related StorageRouters'
            .format(storagedriver.guid, storagedriver.name))
        sr_client_map = SSHClient.get_clients(endpoints=[
            sd.storagerouter for sd in vp_installer.vpool.storagedrivers
        ],
                                              user_names=['root'])
        sr_installer = StorageRouterInstaller(root_client=sr_client_map.get(
            storagerouter, {}).get('root'),
                                              storagerouter=storagerouter,
                                              vp_installer=vp_installer,
                                              sd_installer=sd_installer)

        offline_srs = sr_client_map.pop('offline')
        if sorted([sr.guid for sr in offline_srs
                   ]) != sorted(offline_storage_router_guids):
            raise RuntimeError('Not all StorageRouters are reachable')

        if storagerouter not in offline_srs:
            mtpt_pids = sr_installer.root_client.run(
                "lsof -t +D '/mnt/{0}' || true".format(
                    vp_installer.name.replace(r"'", r"'\''")),
                allow_insecure=True).splitlines()
            if len(mtpt_pids) > 0:
                raise RuntimeError(
                    'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}'
                    .format(', '.join(mtpt_pids)))

        # Retrieve reachable StorageDrivers
        reachable_storagedrivers = []
        for sd in vp_installer.vpool.storagedrivers:
            if sd.storagerouter not in sr_client_map:
                # StorageRouter is offline
                continue

            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                vp_installer.vpool.guid, sd.storagedriver_id)
            if Configuration.exists(sd_key) is True:
                path = Configuration.get_configuration_path(sd_key)
                with remote(sd.storagerouter.ip,
                            [LocalStorageRouterClient]) as rem:
                    try:
                        lsrc = rem.LocalStorageRouterClient(path)
                        lsrc.server_revision(
                        )  # 'Cheap' call to verify whether volumedriver is responsive
                        cls._logger.info(
                            'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}'
                            .format(storagedriver.guid, sd.name,
                                    sd.storagerouter.ip))
                        reachable_storagedrivers.append(sd)
                    except Exception as exception:
                        if not is_connection_failure(exception):
                            raise

        if len(reachable_storagedrivers) == 0:
            raise RuntimeError(
                'Could not find any responsive node in the cluster')

        # Start removal
        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.SHRINKING)
        else:
            vp_installer.update_status(status=VPool.STATUSES.DELETING)

        # Clean up stale vDisks
        cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format(
            storagedriver.guid))
        VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool)

        # Reconfigure the MDSes
        cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format(
            storagedriver.guid))
        for vdisk_guid in storagerouter.vdisks_guids:
            try:
                MDSServiceController.ensure_safety(
                    vdisk_guid=vdisk_guid,
                    excluded_storagerouter_guids=[storagerouter.guid] +
                    offline_storage_router_guids)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed'
                    .format(storagedriver.guid, vdisk_guid))

        # Validate that all MDSes on current StorageRouter have been moved away
        # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code
        vdisks = []
        for mds in vp_installer.mds_services:
            for junction in mds.vdisks:
                vdisk = junction.vdisk
                if vdisk in vdisks:
                    continue
                vdisks.append(vdisk)
                cls._logger.critical(
                    'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away'
                    .format(storagedriver.guid, vdisk.guid, vdisk.name))
        if len(vdisks) > 0:
            # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
            raise RuntimeError(
                'Not all MDS Services have been successfully migrated away')

        # Start with actual removal
        errors_found = False
        if storagerouter not in offline_srs:
            errors_found &= sd_installer.stop_services()

        errors_found &= vp_installer.configure_cluster_registry(
            exclude=[storagedriver], apply_on=reachable_storagedrivers)
        errors_found &= vp_installer.update_node_distance_map()
        errors_found &= vp_installer.remove_mds_services()
        errors_found &= sd_installer.clean_config_management()
        errors_found &= sd_installer.clean_model()

        if storagerouter not in offline_srs:
            errors_found &= sd_installer.clean_directories(
                mountpoints=StorageRouterController.get_mountpoints(
                    client=sr_installer.root_client))

            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=storagerouter.guid)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - Synchronizing disks with reality failed'
                    .format(storagedriver.guid))
                errors_found = True

        if vp_installer.storagedriver_amount > 1:
            # Update the vPool metadata and run DTL checkup
            vp_installer.vpool.metadata['caching_info'].pop(
                sr_installer.storagerouter.guid, None)
            vp_installer.vpool.save()

            try:
                VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                            ensure_single_timeout=600)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}'
                    .format(storagedriver.guid, vp_installer.name,
                            vp_installer.vpool.guid))
        else:
            cls._logger.info(
                'StorageDriver {0} - Removing vPool from model'.format(
                    storagedriver.guid))
            # Clean up model
            try:
                vp_installer.vpool.delete()
            except Exception:
                errors_found = True
                cls._logger.exception(
                    'StorageDriver {0} - Cleaning up vPool from the model failed'
                    .format(storagedriver.guid))
            Configuration.delete('/ovs/vpools/{0}'.format(
                vp_installer.vpool.guid))

        cls._logger.info('StorageDriver {0} - Running MDS checkup'.format(
            storagedriver.guid))
        try:
            MDSServiceController.mds_checkup()
        except Exception:
            cls._logger.exception(
                'StorageDriver {0} - MDS checkup failed'.format(
                    storagedriver.guid))

        # Update vPool status
        if errors_found is True:
            if vp_installer.storagedriver_amount > 1:
                vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise RuntimeError(
                '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information'
            )

        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info(
            'StorageDriver {0} - Deleted StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        if len(VPoolList.get_vpools()) == 0:
            cluster_name = ArakoonInstaller.get_cluster_name('voldrv')
            if ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                    cluster_name=cluster_name)['internal'] is True:
                cls._logger.debug(
                    'StorageDriver {0} - Removing Arakoon cluster {1}'.format(
                        storagedriver.guid, cluster_name))
                try:
                    installer = ArakoonInstaller(cluster_name=cluster_name)
                    installer.load()
                    installer.delete_cluster()
                except Exception:
                    cls._logger.exception(
                        'StorageDriver {0} - Delete voldrv Arakoon cluster failed'
                        .format(storagedriver.guid))
                service_type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                for service in list(service_type.services):
                    if service.name == service_name:
                        service.delete()

        # Remove watcher volumedriver service if last StorageDriver on current StorageRouter
        if len(
                storagerouter.storagedrivers
        ) == 0 and storagerouter not in offline_srs:  # ensure client is initialized for StorageRouter
            try:
                if cls._service_manager.has_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client):
                    cls._service_manager.stop_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
                    cls._service_manager.remove_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - {1} service deletion failed'.format(
                        storagedriver.guid,
                        ServiceFactory.SERVICE_WATCHER_VOLDRV))
Exemple #31
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.generic import GenericController

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter,
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name)
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_service_name=old_service_name,
                                                    new_service_name=new_service_name)

                # Register new service and remove old service
                service_manager.add_service(name='ovs-albaproxy',
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get('fragment_cache', ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']:
                            proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info
                            Configuration.set(proxy_scrub_config_key,
                                              json.dumps(proxy_scrub_config, indent=4),
                                              raw=True)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id)
            storagedriver_config.load()
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_service_name='volumedriver_{0}'.format(vpool.name))
                if service_manager.ImplementationClass == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        for vpool in VPoolList.get_vpools():
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        MigrationController._logger.info('Finished out of band migrations')
        GenericController.refresh_package_information()
Exemple #32
0
 def list(self):
     """
     Overview of all vPools
     """
     return VPoolList.get_vpools()
Exemple #33
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        :param master_ips: IP addresses of the MASTER nodes
        :type master_ips: list or None
        :param extra_ips: IP addresses of the EXTRA nodes
        :type extra_ips: list or None
        """

        _ = master_ips, extra_ips
        working_version = previous_version

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        if working_version < ExtensionMigrator.THIS_VERSION:
            try:
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.dal.lists.vpoollist import VPoolList
                from ovs.extensions.generic.configuration import Configuration
                from ovs.extensions.services.servicefactory import ServiceFactory
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.system import System
                local_machine_id = System.get_my_machine_id()
                local_ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(local_machine_id))
                local_client = SSHClient(endpoint=local_ip, username='******')

                # Multiple Proxies
                if local_client.dir_exists(
                        directory=
                        '/opt/OpenvStorage/config/storagedriver/storagedriver'
                ):
                    local_client.dir_delete(directories=[
                        '/opt/OpenvStorage/config/storagedriver/storagedriver'
                    ])

                # MDS safety granularity on vPool level
                mds_safety_key = '/ovs/framework/storagedriver'
                if Configuration.exists(key=mds_safety_key):
                    current_mds_settings = Configuration.get(
                        key=mds_safety_key)
                    for vpool in VPoolList.get_vpools():
                        vpool_key = '/ovs/vpools/{0}'.format(vpool.guid)
                        if Configuration.dir_exists(key=vpool_key):
                            Configuration.set(
                                key='{0}/mds_config'.format(vpool_key),
                                value=current_mds_settings)
                    Configuration.delete(key=mds_safety_key)

                # Introduction of edition key
                if Configuration.get(key=Configuration.EDITION_KEY,
                                     default=None) not in [
                                         PackageFactory.EDITION_COMMUNITY,
                                         PackageFactory.EDITION_ENTERPRISE
                                     ]:
                    for storagerouter in StorageRouterList.get_storagerouters(
                    ):
                        try:
                            Configuration.set(
                                key=Configuration.EDITION_KEY,
                                value=storagerouter.features['alba']
                                ['edition'])
                            break
                        except:
                            continue

            except:
                ExtensionMigrator._logger.exception(
                    'Error occurred while executing the migration code')
                # Don't update migration version with latest version, resulting in next migration trying again to execute this code
                return ExtensionMigrator.THIS_VERSION - 1

        return ExtensionMigrator.THIS_VERSION
    def check_for_halted_volumes(cls, result_handler):
        """
        Checks for halted volumes on a single or multiple vPools
        This will only check the volume states on the current node. If any other volumedriver would be down,
        only the HA'd volumes would pop-up as they could appear halted here (should be verified by the volumedriver team)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        vpools = VPoolList.get_vpools()
        local_sr = System.get_my_storagerouter()

        if len(vpools) == 0:
            result_handler.skip('No vPools found!'.format(len(vpools)), code=ErrorCodes.vpools_none)
            return
        for vpool in vpools:
            log_start = 'Halted volumes test vPool {0}'.format(vpool.name)
            if vpool.guid not in local_sr.vpools_guids:
                result_handler.skip('{0} - Skipping vPool {1} because it is not living here.'.format(log_start, vpool.name),
                                    code=ErrorCodes.vpool_not_local, add_to_result=False)
                continue

            result_handler.info('{0} - Retrieving all information'.format(log_start), add_to_result=False)
            storagedriver = None
            for std in vpool.storagedrivers:
                if std.storagerouter_guid == local_sr.guid:
                    storagedriver = std
                    break

            if storagedriver is None:
                result_handler.failure('{0} - Could not associate a StorageDriver with this StorageRouter'.format(log_start),
                                       code=ErrorCodes.std_no_str)
                continue

            volume_fenced_states = dict((key, []) for key in cls.FENCED_HALTED_STATUS_MAP.keys())
            volume_lists = {cls.VDISK_HALTED_STATES.HALTED: [], cls.VDISK_HALTED_STATES.FENCED: []}
            volume_states = {cls.VDISK_HALTED_STATES.HALTED: {cls.VDISK_HALTED_STATES.HALTED: volume_lists[cls.VDISK_HALTED_STATES.HALTED]},
                             cls.VDISK_HALTED_STATES.FENCED: volume_fenced_states}  # Less loops to write for outputting
            result_handler.info('{0} - Scanning for halted volumes'.format(log_start), add_to_result=False)
            try:
                voldrv_client = vpool.storagedriver_client
                objectregistry_client = vpool.objectregistry_client
            except Exception:
                cls.logger.exception('{0} - Unable to instantiate the required clients'.format(log_start))
                result_handler.exception('{0} - Unable to load the Volumedriver clients'.format(log_start),
                                         code=ErrorCodes.voldr_unknown_problem)
                continue
            try:
                # Listing all halted volumes with the volumedriver client as it detects stolen volumes too (fenced instances)
                volumes = voldrv_client.list_halted_volumes(str(storagedriver.storagedriver_id))
            except Exception as ex:
                cls.logger.exception('{0} - Exception occurred when listing volumes'.format(log_start))
                if cls._is_volumedriver_timeout(ex) is False:
                    # Unhandled exception at this point
                    result_handler.exception('{0} - Unable to list the Volumes due to an unidentified problem. Please check the logging'.format(log_start),
                                             code=ErrorCodes.voldr_unknown_problem)
                else:
                    result_handler.failure('{0} - Could not list the volumes for due to a connection problem.'.format(log_start),
                                           code=ErrorCodes.voldrv_connection_problem)
                continue
            # Retrieve the parent of the current volume. If this id would not be identical to the one we fetched for, that would mean it is fenced
            # Object registry goes to Arakoon
            # Capturing any possible that would occur to provide a clearer vision of what went wrong
            for volume in volumes:
                try:
                    registry_entry = objectregistry_client.find(volume)
                    if registry_entry.node_id() == storagedriver.storagedriver_id:
                        volume_lists[cls.VDISK_HALTED_STATES.HALTED].append(volume)
                    else:
                        # Fenced
                        volume_lists[cls.VDISK_HALTED_STATES.FENCED].append(volume)
                except Exception:
                    msg = '{0} - Unable to consult the object registry client for volume \'{1}\''.format(log_start, volume)
                    cls.logger.exception(msg)
                    result_handler.exception(msg, code=ErrorCodes.voldr_unknown_problem)
            # Include fenced - OTHER state combo
            for volume in volume_lists[cls.VDISK_HALTED_STATES.FENCED]:
                try:
                    _, state = cls._get_volume_issue(voldrv_client, volume, log_start)
                    volume_fenced_states[state].append(volume)
                except Exception:
                    # Only unhandled at this point
                    result_handler.exception('{0} - Unable to the volume info for volume {1} due to an unidentified problem. Please check the logging'.format(log_start, volume),
                                             code=ErrorCodes.voldr_unknown_problem)
            for halted_state, volume_state_info in volume_states.iteritems():
                for state, volumes in volume_state_info.iteritems():
                    if len(volumes) == 0:
                        continue  # Skip OK/empty lists
                    map_value = cls.FENCED_HALTED_STATUS_MAP[state.lower()]
                    log_func = getattr(result_handler, map_value['severity'])
                    message, code = map_value[halted_state.lower()]
                    log_func('{0} - {1}'.format(log_start, message.format(', '.join(volumes))), code=code)
            # Call success in case nothing is wrong
            if all(len(l) == 0 for l in volume_lists.values()):
                result_handler.success('{0} - No volumes found in halted/fenced state'.format(log_start))
Exemple #35
0
    def _get_mds_information(vpools=None):
        # type: (Optional[List[VPool]]) -> Tuple[collections.OrderedDict, List[StorageRouter]]
        """
        Retrieve a complete overview of all storagerouters and their mds layout
        :param vpools: VPools to get the overview for
        :type vpools: List[VPool]
        :return: - An overview with the vpool as keys, storagerouter - client, services and storagedriver map
                 - All storagerouters that were offline
        :rtype: Tuple[collection.OrderedDict, List[StorageRouter]]
        """
        # Verify StorageRouter availability
        if vpools is None:
            vpools = VPoolList.get_vpools()

        root_client_cache = {}
        storagerouters = StorageRouterList.get_storagerouters()
        storagerouters.sort(key=lambda _sr: ExtensionsToolbox.advanced_sort(
            element=_sr.ip, separator='.'))
        offline_nodes = []
        for storagerouter in storagerouters:
            try:
                root_client = SSHClient(endpoint=storagerouter,
                                        username='******')
                MDSServiceController._logger.debug(
                    'StorageRouter {0} - ONLINE'.format(storagerouter.name))
            except UnableToConnectException:
                root_client = None
                offline_nodes.append(storagerouter)
                MDSServiceController._logger.error(
                    'StorageRouter {0} - OFFLINE'.format(storagerouter.name))
            root_client_cache[storagerouter] = root_client

        # Create mapping per vPool and its StorageRouters
        mds_dict = collections.OrderedDict()
        for vpool in sorted(vpools, key=lambda k: k.name):
            MDSServiceController._logger.info('vPool {0}'.format(vpool.name))
            mds_dict[vpool] = {}

            # Loop all StorageDrivers and add StorageDriver to mapping
            for storagedriver in vpool.storagedrivers:
                storagerouter = storagedriver.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': storagedriver
                    }

            # Loop all MDS Services and append services to appropriate vPool / StorageRouter combo
            mds_services = vpool.mds_services
            mds_services.sort(
                key=lambda _mds_service: ExtensionsToolbox.advanced_sort(
                    element=_mds_service.service.storagerouter.ip,
                    separator='.'))
            for mds_service in mds_services:
                service = mds_service.service
                storagerouter = service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': None
                    }
                MDSServiceController._logger.debug(
                    'vPool {0} - StorageRouter {1} - Service on port {2}'.
                    format(vpool.name, storagerouter.name, service.ports[0]))
                mds_dict[vpool][storagerouter]['services'].append(mds_service)
        return mds_dict, offline_nodes
 def get_vpools():
     """
     Retrieve all vPool objects
     :return: Data object list of vPools
     """
     return VPoolList.get_vpools()
    def cluster_registry_checkup():
        """
        Verify whether changes have occurred in the cluster registry for each vPool
        :return: Information whether changes occurred
        :rtype: dict
        """
        changed_vpools = {}
        for vpool in VPoolList.get_vpools():
            changed_vpools[vpool.guid] = {'changes': False,
                                          'success': True}
            try:
                StorageDriverController._logger.info('Validating cluster registry settings for Vpool {0}'.format(vpool.guid))

                current_configs = vpool.clusterregistry_client.get_node_configs()
                changes = len(current_configs) == 0
                node_configs = []
                for sd in vpool.storagedrivers:
                    sd.invalidate_dynamics(['cluster_node_config'])
                    new_config = sd.cluster_node_config
                    node_configs.append(ClusterNodeConfig(**new_config))
                    if changes is False:
                        current_node_configs = [config for config in current_configs if config.vrouter_id == sd.storagedriver_id]
                        if len(current_node_configs) == 1:
                            current_node_config = current_node_configs[0]
                            for key in new_config:
                                if getattr(current_node_config, key) != new_config[key]:
                                    changes = True
                                    break
                changed_vpools[vpool.guid]['changes'] = changes

                if changes is True:
                    StorageDriverController._logger.info('Cluster registry settings for Vpool {0} needs to be updated'.format(vpool.guid))
                    available_storagedrivers = []
                    for sd in vpool.storagedrivers:
                        storagerouter = sd.storagerouter
                        try:
                            SSHClient(storagerouter, username='******')
                            with remote(storagerouter.ip, [LocalStorageRouterClient]) as rem:
                                sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, sd.storagedriver_id)
                                if Configuration.exists(sd_key) is True:
                                    path = Configuration.get_configuration_path(sd_key)
                                    lsrc = rem.LocalStorageRouterClient(path)
                                    lsrc.server_revision()  # 'Cheap' call to verify whether volumedriver is responsive
                                    available_storagedrivers.append(sd)
                        except UnableToConnectException:
                            StorageDriverController._logger.warning('StorageRouter {0} not available.'.format(storagerouter.name))
                        except Exception as ex:
                            if 'ClusterNotReachableException' in str(ex):
                                StorageDriverController._logger.warning('StorageDriver {0} on StorageRouter {1} not available.'.format(
                                    sd.guid, storagerouter.name
                                ))
                            else:
                                StorageDriverController._logger.exception('Got exception when validating StorageDriver {0} on StorageRouter {1}.'.format(
                                    sd.guid, storagerouter.name
                                ))

                    StorageDriverController._logger.info('Updating cluster node configs for VPool {0}'.format(vpool.guid))
                    vpool.clusterregistry_client.set_node_configs(node_configs)
                    for sd in available_storagedrivers:
                        StorageDriverController._logger.info('Trigger config reload for StorageDriver {0}'.format(sd.guid))
                        vpool.storagedriver_client.update_cluster_node_configs(str(sd.storagedriver_id), req_timeout_secs=10)
                    StorageDriverController._logger.info('Updating cluster node configs for Vpool {0} completed'.format(vpool.guid))
                else:
                    StorageDriverController._logger.info('Cluster registry settings for Vpool {0} is up to date'.format(vpool.guid))
            except Exception as ex:
                StorageDriverController._logger.exception('Got exception when validating cluster registry settings for Vpool {0}.'.format(vpool.name))
                changed_vpools[vpool.guid]['success'] = False
                changed_vpools[vpool.guid]['error'] = ex.message
        return changed_vpools
Exemple #38
0
    def migrate(previous_version):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        """

        working_version = previous_version

        if working_version == 0:
            # Initial version:
            # * Set the version to THIS RELEASE version

            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters +
                                                                  string.digits +
                                                                  '|_=+*#@!/-[]{}<>.?,\'";:~')
                                                    for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [
                (admin_group, [read_role, write_role, manage_role]),
                (viewers_group, [read_role])
            ]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add service types
            for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        elif working_version < OVSMigrator.THIS_VERSION:
            # Migrate unique constraints
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            client = PersistentFactory.get_client()
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                classname = cls.__name__.lower()
                unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname)
                uniques = []
                # noinspection PyProtectedMember
                for prop in cls._properties:
                    if prop.unique is True and len([k for k in client.prefix(unique_key.format(prop.name))]) == 0:
                        uniques.append(prop.name)
                if len(uniques) > 0:
                    prefix = 'ovs_data_{0}_'.format(classname)
                    for key in client.prefix(prefix):
                        data = client.get(key)
                        for property_name in uniques:
                            ukey = '{0}{1}'.format(unique_key.format(property_name), hashlib.sha1(str(data[property_name])).hexdigest())
                            client.set(ukey, key)

            # Complete rework of the way we detect devices to assign roles or use as ASD
            # Allow loop-, raid-, nvme-, ??-devices and logical volumes as ASD (https://github.com/openvstorage/framework/issues/792)
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException
            from ovs.lib.disk import DiskController

            for storagerouter in StorageRouterList.get_storagerouters():
                try:
                    client = SSHClient(storagerouter, username='******')
                except UnableToConnectException:
                    raise

                # Retrieve all symlinks for all devices
                # Example of name_alias_mapping:
                # {'/dev/md0': ['/dev/disk/by-id/md-uuid-ad2de634:26d97253:5eda0a23:96986b76', '/dev/disk/by-id/md-name-OVS-1:0'],
                #  '/dev/sda': ['/dev/disk/by-path/pci-0000:03:00.0-sas-0x5000c295fe2ff771-lun-0'],
                #  '/dev/sda1': ['/dev/disk/by-uuid/e3e0bc62-4edc-4c6b-a6ce-1f39e8f27e41', '/dev/disk/by-path/pci-0000:03:00.0-sas-0x5000c295fe2ff771-lun-0-part1']}
                name_alias_mapping = {}
                alias_name_mapping = {}
                for path_type in client.dir_list(directory='/dev/disk'):
                    if path_type in ['by-uuid', 'by-partuuid']:  # UUIDs can change after creating a filesystem on a partition
                        continue
                    directory = '/dev/disk/{0}'.format(path_type)
                    for symlink in client.dir_list(directory=directory):
                        symlink_path = '{0}/{1}'.format(directory, symlink)
                        link = client.file_read_link(symlink_path)
                        if link not in name_alias_mapping:
                            name_alias_mapping[link] = []
                        name_alias_mapping[link].append(symlink_path)
                        alias_name_mapping[symlink_path] = link

                for disk in storagerouter.disks:
                    if disk.aliases is None:
                        # noinspection PyProtectedMember
                        device_path = '/dev/{0}'.format(disk.name)
                        disk.aliases = name_alias_mapping.get(device_path, [device_path])
                        disk.save()
                    for partition in disk.partitions:
                        if partition.aliases is None:
                            # noinspection PyProtectedMember
                            partition_device = alias_name_mapping.get(partition._data.get('path'))
                            if partition_device is None:
                                partition.aliases = []
                                partition.save()
                                continue
                            partition.aliases = name_alias_mapping.get(partition_device, [])
                            partition.save()

                DiskController.sync_with_reality(storagerouter_guid=storagerouter.guid)

            # Only support ALBA backend type
            from ovs.dal.lists.backendtypelist import BackendTypeList
            for backend_type in BackendTypeList.get_backend_types():
                if backend_type.code != 'alba':
                    backend_type.delete()

            # Reformat the vpool.metadata information
            from ovs.dal.lists.vpoollist import VPoolList
            for vpool in VPoolList.get_vpools():
                new_metadata = {}
                for metadata_key, value in vpool.metadata.items():
                    new_info = {}
                    storagerouter_guids = [key for key in vpool.metadata.keys() if not key.startswith('backend')]
                    if isinstance(value, dict):
                        read_cache = value.get('backend_info', {}).get('fragment_cache_on_read', True)
                        write_cache = value.get('backend_info', {}).get('fragment_cache_on_write', False)
                        new_info['backend_info'] = {'alba_backend_guid': value.get('backend_guid'),
                                                    'backend_guid': None,
                                                    'frag_size': value.get('backend_info', {}).get('frag_size'),
                                                    'name': value.get('name'),
                                                    'policies': value.get('backend_info', {}).get('policies'),
                                                    'preset': value.get('preset'),
                                                    'sco_size': value.get('backend_info', {}).get('sco_size'),
                                                    'total_size': value.get('backend_info', {}).get('total_size')}
                        new_info['arakoon_config'] = value.get('arakoon_config')
                        new_info['connection_info'] = {'host': value.get('connection', {}).get('host', ''),
                                                       'port': value.get('connection', {}).get('port', ''),
                                                       'local': value.get('connection', {}).get('local', ''),
                                                       'client_id': value.get('connection', {}).get('client_id', ''),
                                                       'client_secret': value.get('connection', {}).get('client_secret', '')}
                        if metadata_key == 'backend':
                            new_info['caching_info'] = dict((sr_guid, {'fragment_cache_on_read': read_cache, 'fragment_cache_on_write': write_cache}) for sr_guid in storagerouter_guids)
                    if metadata_key in storagerouter_guids:
                        metadata_key = 'backend_aa_{0}'.format(metadata_key)
                    new_metadata[metadata_key] = new_info
                vpool.metadata = new_metadata
                vpool.save()

            # Removal of READ role
            from ovs.dal.lists.diskpartitionlist import DiskPartitionList
            for partition in DiskPartitionList.get_partitions():
                if 'READ' in partition.roles:
                    partition.roles.remove('READ')
                    partition.save()

        return OVSMigrator.THIS_VERSION
Exemple #39
0
    def migrate(previous_version):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        """

        working_version = previous_version

        if working_version == 0:
            # Initial version:
            # * Set the version to THIS RELEASE version
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(
                random.choice(string.ascii_letters + string.digits +
                              '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [(admin_group, [read_role, write_role, manage_role]),
                       (viewers_group, [read_role])]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add service types
            for service_type_info in [
                    ServiceType.SERVICE_TYPES.MD_SERVER,
                    ServiceType.SERVICE_TYPES.ALBA_PROXY,
                    ServiceType.SERVICE_TYPES.ARAKOON
            ]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        elif working_version < DALMigrator.THIS_VERSION:
            from ovs.dal.datalist import DataList
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.lists.vpoollist import VPoolList
            from ovs.extensions.generic.configuration import Configuration
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            persistent_client = PersistentFactory.get_client()
            if working_version < 16:
                # The list caching keys were changed to class|field|list_id instead of class|list_id|field
                persistent_client.delete_prefix(
                    DataList.generate_persistent_cache_key())

            # Migrate unique constraints & indexes
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                classname = cls.__name__.lower()
                unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname)
                index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname)
                index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname)
                uniques = []
                indexes = []
                # noinspection PyProtectedMember
                for prop in cls._properties:
                    if prop.unique is True and len([
                            k for k in persistent_client.prefix(
                                unique_key.format(prop.name))
                    ]) == 0:
                        uniques.append(prop.name)
                    if prop.indexed is True and len([
                            k for k in persistent_client.prefix(
                                index_prefix.format(prop.name))
                    ]) == 0:
                        indexes.append(prop.name)
                if len(uniques) > 0 or len(indexes) > 0:
                    prefix = 'ovs_data_{0}_'.format(classname)
                    for key, data in persistent_client.prefix_entries(prefix):
                        for property_name in uniques:
                            ukey = '{0}{1}'.format(
                                unique_key.format(property_name),
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            persistent_client.set(ukey, key)
                        for property_name in indexes:
                            if property_name not in data:
                                continue  # This is the case when there's a new indexed property added.
                            ikey = index_key.format(
                                property_name,
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            index = list(
                                persistent_client.get_multi(
                                    [ikey], must_exist=False))[0]
                            transaction = persistent_client.begin_transaction()
                            if index is None:
                                persistent_client.assert_value(
                                    ikey, None, transaction=transaction)
                                persistent_client.set(ikey, [key],
                                                      transaction=transaction)
                            elif key not in index:
                                persistent_client.assert_value(
                                    ikey, index[:], transaction=transaction)
                                persistent_client.set(ikey,
                                                      index + [key],
                                                      transaction=transaction)
                            persistent_client.apply_transaction(transaction)

            # Clean up - removal of obsolete 'cfgdir'
            paths = Configuration.get(key='/ovs/framework/paths')
            if 'cfgdir' in paths:
                paths.pop('cfgdir')
                Configuration.set(key='/ovs/framework/paths', value=paths)

            # Rewrite indices 'alba_proxy' --> 'alba_proxies'
            changes = False
            transaction = persistent_client.begin_transaction()
            for old_key in persistent_client.prefix(
                    'ovs_reverseindex_storagedriver'):
                if '|alba_proxy|' in old_key:
                    changes = True
                    new_key = old_key.replace('|alba_proxy|', '|alba_proxies|')
                    persistent_client.set(key=new_key,
                                          value=0,
                                          transaction=transaction)
                    persistent_client.delete(key=old_key,
                                             transaction=transaction)
            if changes is True:
                persistent_client.apply_transaction(transaction=transaction)

            # Introduction of DTL role (Replaces DTL sub_role)
            for vpool in VPoolList.get_vpools():
                for storagedriver in vpool.storagedrivers:
                    for junction_partition_guid in storagedriver.partitions_guids:
                        junction_partition = StorageDriverPartition(
                            junction_partition_guid)
                        if junction_partition.role == DiskPartition.ROLES.WRITE and junction_partition.sub_role == 'DTL':
                            junction_partition.role = DiskPartition.ROLES.DTL
                            junction_partition.sub_role = None
                            junction_partition.save()
                            if DiskPartition.ROLES.DTL not in junction_partition.partition.roles:
                                junction_partition.partition.roles.append(
                                    DiskPartition.ROLES.DTL)
                                junction_partition.partition.save()

        return DALMigrator.THIS_VERSION
Exemple #40
0
 def list(self):
     """
     Overview of all vPools
     """
     return VPoolList.get_vpools()
from ovs.dal.lists.vpoollist import VPoolList
import volumedriver.storagerouter.storagerouterclient as src

for vp in VPoolList.get_vpools():
    voldrv_client = src.LocalStorageRouterClient("/opt/OpenvStorage/config/storagedriver/storagedriver/vmstor.json")

    voldrv_volume_list = voldrv_client.list_volumes()
    model_vdisk_list = vp.vdisks

    for vdisk in model_vdisk_list:
        if not vdisk.volume_id in voldrv_volume_list:
            #print vdisk
            mds = vdisk.mds_services
            if len(mds) == 0:
                print "no mds"
                vdisk.delete()
            else:
                for md in mds:
                    print "mds"
                    md.delete()
                vdisk.delete()