Beispiel #1
0
    def is_service_internally_managed(service):
        """
        Validate whether the service is internally or externally managed
        :param service: Service to verify
        :type service: str
        :return: True if internally managed, False otherwise
        :rtype: bool
        """
        if service not in ['memcached', 'rabbitmq']:
            raise ValueError('Can only check memcached or rabbitmq')

        service_name_map = {'memcached': 'memcache',
                            'rabbitmq': 'messagequeue'}[service]
        config_key = '/ovs/framework/{0}'.format(service_name_map)
        if not Configuration.exists(key=config_key):
            return True

        if not Configuration.exists(key='{0}|metadata'.format(config_key)):
            raise ValueError('Not all required keys ({0}) for {1} are present in the configuration management'.format(config_key, service))
        metadata = Configuration.get('{0}|metadata'.format(config_key))
        if 'internal' not in metadata:
            raise ValueError('Internal flag not present in metadata for {0}.\nPlease provide a key: {1} and value "metadata": {{"internal": True/False}}'.format(service, config_key))

        internal = metadata['internal']
        if internal is False:
            if not Configuration.exists(key='{0}|endpoints'.format(config_key)):
                raise ValueError('Externally managed {0} cluster must have "endpoints" information\nPlease provide a key: {1} and value "endpoints": [<ip:port>]'.format(service, config_key))
            endpoints = Configuration.get(key='{0}|endpoints'.format(config_key))
            if not isinstance(endpoints, list) or len(endpoints) == 0:
                raise ValueError('The endpoints for {0} cannot be empty and must be a list'.format(service))
        return internal
Beispiel #2
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            from ovs.extensions.generic.configuration import Configuration
            if Configuration.exists('ovs.arakoon'):
                Configuration.delete('ovs.arakoon', remove_root=True)
            Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            import time
            from ovs.extensions.generic.configuration import Configuration
            if not Configuration.exists('ovs.core.registered'):
                Configuration.set('ovs.core.registered', False)
                Configuration.set('ovs.core.install_time', time.time())

            working_version = 2

        return working_version
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            from ovs.extensions.generic.configuration import Configuration
            if Configuration.exists('ovs.arakoon'):
                Configuration.delete('ovs.arakoon', remove_root=True)
            Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            import time
            from ovs.extensions.generic.configuration import Configuration
            if not Configuration.exists('ovs.core.registered'):
                Configuration.set('ovs.core.registered', False)
                Configuration.set('ovs.core.install_time', time.time())

            working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            from ovs.extensions.db.arakoon import ArakoonInstaller
            reload(ArakoonInstaller)
            from ovs.extensions.db.arakoon import ArakoonInstaller
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.generic.configuration import Configuration
            if master_ips is not None:
                for ip in master_ips:
                    client = SSHClient(ip)
                    if client.dir_exists(ArakoonInstaller.ArakoonInstaller.ARAKOON_CONFIG_DIR):
                        for cluster_name in client.dir_list(ArakoonInstaller.ArakoonInstaller.ARAKOON_CONFIG_DIR):
                            try:
                                ArakoonInstaller.ArakoonInstaller.deploy_cluster(cluster_name, ip)
                            except:
                                pass
            if Configuration.exists('ovs.core.storage.persistent'):
                Configuration.set('ovs.core.storage.persistent', 'pyrakoon')

            working_version = 3

        return working_version
Beispiel #4
0
    def migrate(master_ips=None, extra_ips=None):
        """
        Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """
        machine_id = System.get_my_machine_id()
        key = '/ovs/framework/hosts/{0}/versions'.format(machine_id)
        data = Configuration.get(key) if Configuration.exists(key) else {}
        migrators = []
        path = '/'.join([os.path.dirname(__file__), 'migration'])
        for filename in os.listdir(path):
            if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'):
                name = filename.replace('.py', '')
                module = imp.load_source(name, '/'.join([path, filename]))
                for member in inspect.getmembers(module):
                    if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]:
                        migrators.append((member[1].identifier, member[1].migrate))

        end_version = 0
        for identifier, method in migrators:
            base_version = data[identifier] if identifier in data else 0
            version = method(base_version, master_ips, extra_ips)
            if version > end_version:
                end_version = version
            data[identifier] = end_version

        Configuration.set(key, data)
Beispiel #5
0
    def __init__(self, vpool_guid, storagedriver_id):
        """
        Initializes the class
        """
        _log_level = LOG_LEVEL_MAPPING[OVSLogger(
            'extensions').getEffectiveLevel()]
        # noinspection PyCallByClass,PyTypeChecker
        storagerouterclient.Logger.setupLogging(
            OVSLogger.load_path('storagerouterclient'), _log_level)
        # noinspection PyArgumentList
        storagerouterclient.Logger.enableLogging()

        self._key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
            vpool_guid, storagedriver_id)
        self._logger = OVSLogger('extensions')
        self._dirty_entries = []

        self.remote_path = Configuration.get_configuration_path(
            self._key).strip('/')
        # Load configuration
        if Configuration.exists(self._key):
            self.configuration = Configuration.get(self._key)
            self.config_missing = False
        else:
            self.configuration = {}
            self.config_missing = True
            self._logger.debug(
                'Could not find config {0}, a new one will be created'.format(
                    self._key))
    def config_files_check_test():
        """
        Verify some configuration files
        """
        issues_found = ''

        config_keys = {
            "/ovs/framework/memcache",
            "/ovs/arakoon/ovsdb/config"
        }

        for key_to_check in config_keys:
            if not Configuration.exists(key_to_check, raw=True):
                issues_found += "Couldn't find {0}\n".format(key_to_check)

        config_files = {
            "rabbitmq.config": "/etc/rabbitmq/rabbitmq.config",
        }
        grid_ip = General.get_config().get('main', 'grid_ip')
        ssh_pass = General.get_config().get('mgmtcenter', 'password')
        client = SSHClient(grid_ip, username='******', password=ssh_pass)
        for config_file_to_check in config_files.iterkeys():
            if not client.file_exists(config_files[config_file_to_check]):
                issues_found += "Couldn't find {0}\n".format(config_file_to_check)

        assert issues_found == '',\
            "Found the following issues while checking for the config files:{0}\n".format(issues_found)
 def delete_config(cluster_name):
     """
     Remove the configuration entry for arakoon cluster_name
     :param cluster_name: Name of the arakoon cluster
     :return: None
     """
     config_key = GeneralArakoon.CONFIG_KEY.format(cluster_name)
     if Configuration.exists(config_key, raw=True):
         Configuration.delete(os.path.dirname(config_key))
Beispiel #8
0
    def is_service_internally_managed(service):
        """
        Validate whether the service is internally or externally managed
        :param service: Service to verify
        :type service: str
        :return: True if internally managed, False otherwise
        :rtype: bool
        """
        if service not in ['memcached', 'rabbitmq']:
            raise ValueError('Can only check memcached or rabbitmq')

        service_name_map = {
            'memcached': 'memcache',
            'rabbitmq': 'messagequeue'
        }[service]
        config_key = '/ovs/framework/{0}'.format(service_name_map)
        if not Configuration.exists(key=config_key):
            return True

        if not Configuration.exists(key='{0}|metadata'.format(config_key)):
            raise ValueError(
                'Not all required keys ({0}) for {1} are present in the configuration management'
                .format(config_key, service))
        metadata = Configuration.get('{0}|metadata'.format(config_key))
        if 'internal' not in metadata:
            raise ValueError(
                'Internal flag not present in metadata for {0}.\nPlease provide a key: {1} and value "metadata": {{"internal": True/False}}'
                .format(service, config_key))

        internal = metadata['internal']
        if internal is False:
            if not Configuration.exists(
                    key='{0}|endpoints'.format(config_key)):
                raise ValueError(
                    'Externally managed {0} cluster must have "endpoints" information\nPlease provide a key: {1} and value "endpoints": [<ip:port>]'
                    .format(service, config_key))
            endpoints = Configuration.get(
                key='{0}|endpoints'.format(config_key))
            if not isinstance(endpoints, list) or len(endpoints) == 0:
                raise ValueError(
                    'The endpoints for {0} cannot be empty and must be a list'.
                    format(service))
        return internal
 def load(self):
     """
     Loads the configuration from a given file, optionally a remote one
     """
     self.configuration = {}
     if Configuration.exists(self.key):
         self.is_new = False
         self.configuration = json.loads(Configuration.get(self.key, raw=True))
     else:
         self._logger.debug('Could not find config {0}, a new one will be created'.format(self.key))
     self.dirty_entries = []
 def delete_config(self, ip=None):
     """
     Deletes a configuration file
     """
     if self.filesystem is False:
         key = self.config_path
         if Configuration.exists(key, raw=True):
             Configuration.delete(key, raw=True)
     else:
         client = self._load_client(ip)
         client.file_delete(self.config_path)
Beispiel #11
0
 def delete_config(self, ip=None):
     """
     Deletes a configuration file
     """
     if self.filesystem is False:
         key = self.config_path
         if Configuration.exists(key, raw=True):
             Configuration.delete(key, raw=True)
     else:
         client = self._load_client(ip)
         client.file_delete(self.config_path)
Beispiel #12
0
 def ipmi_check(cls, result_handler):
     """
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return:
     """
     for albanode in AlbaNodeList.get_albanodes():
         node_id = albanode.node_id
         ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(
             node_id)
         if not Configuration.exists(ipmi_config_loc):
             result_handler.skip(
                 'No IPMI info found on AlbaNode with ID {0}'.format(
                     node_id))
             continue
         ipmi_config = Configuration.get(ipmi_config_loc)
         ip = ipmi_config.get('ip')
         try:
             controller = IPMIController(
                 ip=ip,
                 username=ipmi_config.get('username'),
                 password=ipmi_config.get('password'),
                 client=SSHClient(System.get_my_storagerouter()))
         except:
             result_handler.failure(
                 'IPMI settings are not valid for AlbaNode with ID {0}'.
                 format(node_id))
             continue
         try:
             status = controller.status_node().get(ip)
             if status == IPMIController.IPMI_POWER_ON:
                 result_handler.success(
                     'IPMI AlbaNode with ID {0} status is POWER ON'.format(
                         node_id))
             elif status == IPMIController.IPMI_POWER_OFF:
                 result_handler.warning(
                     'IPMI AlbaNode with ID {0} status is POWER OFF'.format(
                         node_id))
         except IPMITimeOutException as ex:
             result_handler.failure(
                 "IPMI AlbaNode with ID {0} timed out: '{1}'".format(
                     node_id, ex))
         except IPMICallException as ex:
             result_handler.failure(
                 "IPMI AlbaNode with ID {0} call failed: '{1}'".format(
                     node_id, ex))
         except Exception:
             msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(
                 node_id)
             cls.logger.exception(msg)
             result_handler.exception(msg)
Beispiel #13
0
 def load(self):
     """
     Loads the configuration from a given file, optionally a remote one
     """
     self.configuration = {}
     if Configuration.exists(self.key):
         self.is_new = False
         self.configuration = json.loads(
             Configuration.get(self.key, raw=True))
     else:
         self._logger.debug(
             'Could not find config {0}, a new one will be created'.format(
                 self.key))
     self.dirty_entries = []
    def get_config(cluster_name):
        """
        Retrieve the configuration for given cluster
        :param cluster_name: Name of the cluster
        :return: RawConfigParser object
        """
        config_key = GeneralArakoon.CONFIG_KEY.format(cluster_name)
        if not Configuration.exists(config_key, raw=True):
            raise ValueError('Unknown arakoon cluster_name {0} provided'.format(cluster_name))

        voldrv_config = Configuration.get(config_key, raw=True)
        parser = RawConfigParser()
        parser.readfp(StringIO(voldrv_config))
        return parser
 def get_cluster_name(internal_name):
     """
     Retrieve the name of the cluster
     :param internal_name: Name as known by the framework
     :type internal_name: str
     :return: Name known by user
     :rtype: str
     """
     config_key = '/ovs/framework/arakoon_clusters'
     if Configuration.exists(config_key):
         cluster_info = Configuration.get(config_key)
         if internal_name in cluster_info:
             return cluster_info[internal_name]
     if internal_name not in ['ovsdb', 'voldrv']:
         return internal_name
Beispiel #16
0
 def get_path(binary_name):
     """
     Retrieve the absolute path for binary
     :param binary_name: Binary to get path for
     :return: Path
     """
     machine_id = System.get_my_machine_id()
     config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name)
     if not Configuration.exists(config_location):
         try:
             path = check_output("which '{0}'".format(binary_name.replace(r"'", r"'\''")), shell=True).strip()
             Configuration.set(config_location, path)
         except CalledProcessError:
             return None
     else:
         path = Configuration.get(config_location)
     return path
Beispiel #17
0
        def _update_manifest_cache_size(_proxy_config_key):
            updated = False
            manifest_cache_size = 500 * 1024 * 1024
            if Configuration.exists(key=_proxy_config_key):
                _proxy_config = Configuration.get(key=_proxy_config_key)
                for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]:
                    if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba':
                        if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size:
                            updated = True
                            _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size
                if _proxy_config['manifest_cache_size'] != manifest_cache_size:
                    updated = True
                    _proxy_config['manifest_cache_size'] = manifest_cache_size

                if updated is True:
                    Configuration.set(key=_proxy_config_key, value=_proxy_config)
            return updated
    def verify_arakoon_structure(client, cluster_name, config_present, dir_present):
        """
        Verify the expected arakoon structure and configuration
        :param client: SSHClient object
        :param cluster_name: Name of the arakoon cluster
        :param config_present: configuration presence expectancy
        :param dir_present: Directory structure presence expectancy
        :return: True if correct
        """
        tlog_dir = GeneralArakoon.TLOG_DIR.format('/var/tmp', cluster_name)
        home_dir = GeneralArakoon.HOME_DIR.format('/var/tmp', cluster_name)

        key_exists = Configuration.exists(GeneralArakoon.CONFIG_KEY.format(cluster_name), raw=True)
        assert key_exists is config_present,\
            "Arakoon configuration was {0} expected".format('' if config_present else 'not ')
        for directory in [tlog_dir, home_dir]:
            assert client.dir_exists(directory) is dir_present,\
                "Arakoon directory {0} was {1} expected".format(directory, '' if dir_present else 'not ')
def teardown():
    """
    Teardown for Arakoon package, will be executed when all started tests in this package have ended
    Removal actions of possible things left over after the test-run
    :return: None
    """
    for storagerouter in GeneralStorageRouter.get_masters():
        root_client = SSHClient(storagerouter, username='******')
        if GeneralService.get_service_status(name='ovs-scheduled-tasks',
                                             client=root_client) is False:
            GeneralService.start_service(name='ovs-scheduled-tasks',
                                         client=root_client)

        for location in TEST_CLEANUP:
            root_client.run(['rm', '-rf', location])

    for key in KEY_CLEANUP:
        if Configuration.exists('{0}/{1}'.format(GeneralArakoon.CONFIG_ROOT, key), raw=True):
            Configuration.delete('{0}/{1}'.format(GeneralArakoon.CONFIG_ROOT, key))
Beispiel #20
0
 def get_path(binary_name):
     """
     Retrieve the absolute path for binary
     :param binary_name: Binary to get path for
     :return: Path
     """
     machine_id = System.get_my_machine_id()
     config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(
         machine_id, binary_name)
     if not Configuration.exists(config_location):
         try:
             path = check_output("which '{0}'".format(
                 binary_name.replace(r"'", r"'\''")),
                                 shell=True).strip()
             Configuration.set(config_location, path)
         except CalledProcessError:
             return None
     else:
         path = Configuration.get(config_location)
     return path
Beispiel #21
0
    def migrate():
        """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one """

        data = Configuration.get('ovs.core.versions') if Configuration.exists('ovs.core.versions') else {}
        migrators = []
        path = os.path.join(os.path.dirname(__file__), 'migration')
        for filename in os.listdir(path):
            if os.path.isfile(os.path.join(path, filename)) and filename.endswith('.py'):
                name = filename.replace('.py', '')
                module = imp.load_source(name, os.path.join(path, filename))
                for member in inspect.getmembers(module):
                    if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]:
                        migrators.append((member[1].identifier, member[1].migrate))

        end_version = 0
        for identifier, method in migrators:
            base_version = data[identifier] if identifier in data else 0
            version = method(base_version)
            if version > end_version:
                end_version = version
            data[identifier] = end_version

        Configuration.set('ovs.core.versions', data)
 def ipmi_check(cls, result_handler):
     """
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return:
     """
     for albanode in AlbaNodeList.get_albanodes():
         node_id = albanode.node_id
         ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(node_id)
         if not Configuration.exists(ipmi_config_loc):
             result_handler.skip('No IPMI info found on AlbaNode with ID {0}'.format(node_id))
             continue
         ipmi_config = Configuration.get(ipmi_config_loc)
         ip = ipmi_config.get('ip')
         try:
             controller = IPMIController(ip=ip,
                                         username=ipmi_config.get('username'),
                                         password=ipmi_config.get('password'),
                                         client=SSHClient(System.get_my_storagerouter()))
         except:
             result_handler.failure('IPMI settings are not valid for AlbaNode with ID {0}'.format(node_id))
             continue
         try:
             status = controller.status_node().get(ip)
             if status == IPMIController.IPMI_POWER_ON:
                 result_handler.success('IPMI AlbaNode with ID {0} status is POWER ON'.format(node_id))
             elif status == IPMIController.IPMI_POWER_OFF:
                 result_handler.warning('IPMI AlbaNode with ID {0} status is POWER OFF'.format(node_id))
         except IPMITimeOutException as ex:
             result_handler.failure("IPMI AlbaNode with ID {0} timed out: '{1}'".format(node_id, ex))
         except IPMICallException as ex:
             result_handler.failure("IPMI AlbaNode with ID {0} call failed: '{1}'".format(node_id, ex))
         except Exception:
             msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(node_id)
             cls.logger.exception(msg)
             result_handler.exception(msg)
Beispiel #23
0
 def change_scheduled_task(task_name, state, disabled=False, cron=None):
     if not Configuration.exists(celery_key):
         Configuration.set(celery_key, {})
     jobs = Configuration.get(celery_key)
     if state == 'present':
         if disabled:
             jobs[task_name] = None
             output = 'task {0}: disabled'.format(task_name)
         else:
             jobs[task_name] = cron
             settings = ''
             for key, value in cron.iteritems():
                 settings += "{0}: {1} ".format(key, value)
             output = 'task {0}: cron settings {1}'.format(task_name, settings)
     else:
         jobs.pop(task_name, None)
         output = 'task {0}: removed, default settings will be applied.'.format(task_name)
     Configuration.set(celery_key, jobs)
     service_name = 'scheduled-tasks'
     service_manager = ServiceFactory.get_manager()
     for storagerouter in StorageRouterList.get_masters():
         client = SSHClient(storagerouter, username='******')
         service_manager.restart_service(service_name, client=client)
     return output
    def _stack(self):
        """
        Returns an overview of this node's storage stack
        """
        from ovs.dal.hybrids.albabackend import AlbaBackend
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        def _move(info):
            for move in [('state', 'status'),
                         ('state_detail', 'status_detail')]:
                if move[0] in info:
                    info[move[1]] = info[move[0]]
                    del info[move[0]]

        stack = {}
        node_down = False
        # Fetch stack from asd-manager
        try:
            remote_stack = self.client.get_stack()
            for slot_id, slot_data in remote_stack.iteritems():
                stack[slot_id] = {'status': 'ok'}
                stack[slot_id].update(slot_data)
                # Migrate state > status
                _move(stack[slot_id])
                for osd_data in slot_data.get('osds', {}).itervalues():
                    _move(osd_data)
        except (requests.ConnectionError, requests.Timeout,
                InvalidCredentialsError):
            self._logger.warning(
                'Error during stack retrieval. Assuming that the node is down')
            node_down = True

        model_osds = {}
        found_osds = {}
        # Apply own model to fetched stack
        for osd in self.osds:
            model_osds[osd.osd_id] = osd  # Initially set the info
            if osd.slot_id not in stack:
                stack[osd.slot_id] = {
                    'status':
                    self.OSD_STATUSES.UNKNOWN
                    if node_down is True else self.OSD_STATUSES.MISSING,
                    'status_detail':
                    self.OSD_STATUS_DETAILS.NODEDOWN
                    if node_down is True else '',
                    'osds': {}
                }
            osd_data = stack[osd.slot_id]['osds'].get(osd.osd_id, {})
            stack[osd.slot_id]['osds'][
                osd.osd_id] = osd_data  # Initially set the info in the stack
            osd_data.update(osd.stack_info)
            if node_down is True:
                osd_data['status'] = self.OSD_STATUSES.UNKNOWN
                osd_data['status_detail'] = self.OSD_STATUS_DETAILS.NODEDOWN
            elif osd.alba_backend_guid is not None:  # Osds has been claimed
                # Load information from alba
                if osd.alba_backend_guid not in found_osds:
                    found_osds[osd.alba_backend_guid] = {}
                    if osd.alba_backend.abm_cluster is not None:
                        config = Configuration.get_configuration_path(
                            osd.alba_backend.abm_cluster.config_location)
                        try:
                            for found_osd in AlbaCLI.run(
                                    command='list-all-osds', config=config):
                                found_osds[osd.alba_backend_guid][
                                    found_osd['long_id']] = found_osd
                        except (AlbaError, RuntimeError):
                            self._logger.exception(
                                'Listing all osds has failed')
                            osd_data['status'] = self.OSD_STATUSES.UNKNOWN
                            osd_data[
                                'status_detail'] = self.OSD_STATUS_DETAILS.ALBAERROR
                            continue

                if osd.osd_id not in found_osds[osd.alba_backend_guid]:
                    # Not claimed by any backend thus not in use
                    continue
                found_osd = found_osds[osd.alba_backend_guid][osd.osd_id]
                if found_osd['decommissioned'] is True:
                    osd_data['status'] = self.OSD_STATUSES.UNAVAILABLE
                    osd_data[
                        'status_detail'] = self.OSD_STATUS_DETAILS.DECOMMISSIONED
                    continue

                backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(
                    osd.alba_backend_guid)
                if Configuration.exists(backend_interval_key):
                    interval = Configuration.get(backend_interval_key)
                else:
                    interval = Configuration.get(
                        '/ovs/alba/backends/global_gui_error_interval')
                read = found_osd['read'] or [0]
                write = found_osd['write'] or [0]
                errors = found_osd['errors']
                osd_data['status'] = self.OSD_STATUSES.WARNING
                osd_data['status_detail'] = self.OSD_STATUS_DETAILS.ERROR
                if len(errors) == 0 or (len(read + write) > 0
                                        and max(min(read), min(write)) >
                                        max(error[0]
                                            for error in errors) + interval):
                    osd_data['status'] = self.OSD_STATUSES.OK
                    osd_data['status_detail'] = ''

        statistics = {}
        for slot_info in stack.itervalues():
            for osd_id, osd in slot_info['osds'].iteritems():
                if osd.get(
                        'status_detail') == self.OSD_STATUS_DETAILS.ACTIVATING:
                    osd['claimed_by'] = 'unknown'  # We won't be able to connect to it just yet
                    continue
                if osd_id not in model_osds:
                    # The osd is known by the remote node but not in the model
                    # In that case, let's connect to the OSD to see whether we get some info from it
                    try:
                        ips = osd['hosts'] if 'hosts' in osd and len(
                            osd['hosts']) > 0 else osd.get('ips', [])
                        port = osd['port']
                        claimed_by = 'unknown'
                        for ip in ips:
                            try:
                                # Output will be None if it is not claimed
                                claimed_by = AlbaCLI.run('get-osd-claimed-by',
                                                         named_params={
                                                             'host': ip,
                                                             'port': port
                                                         })
                                break
                            except (AlbaError, RuntimeError):
                                self._logger.warning(
                                    'get-osd-claimed-by failed for IP:port {0}:{1}'
                                    .format(ip, port))
                        alba_backend = AlbaBackendList.get_by_alba_id(
                            claimed_by)
                        osd['claimed_by'] = alba_backend.guid if alba_backend is not None else claimed_by
                    except KeyError:
                        osd['claimed_by'] = 'unknown'
                    except:
                        self._logger.exception(
                            'Could not load OSD info: {0}'.format(osd_id))
                        osd['claimed_by'] = 'unknown'
                        if osd.get('status') not in ['error', 'warning']:
                            osd['status'] = self.OSD_STATUSES.ERROR
                            osd['status_detail'] = self.OSD_STATUS_DETAILS.UNREACHABLE
                claimed_by = osd.get('claimed_by', 'unknown')
                if claimed_by == 'unknown':
                    continue
                try:
                    alba_backend = AlbaBackend(claimed_by)
                except ObjectNotFoundException:
                    continue
                # Add usage information
                if alba_backend not in statistics:
                    statistics[alba_backend] = alba_backend.osd_statistics
                osd_statistics = statistics[alba_backend]
                if osd_id not in osd_statistics:
                    continue
                stats = osd_statistics[osd_id]
                osd['usage'] = {
                    'size': int(stats['capacity']),
                    'used': int(stats['disk_usage']),
                    'available': int(stats['capacity'] - stats['disk_usage'])
                }
        return stack
Beispiel #25
0
    def get(self, request, *args, **kwargs):
        """
        Fetches metadata
        """
        _ = args, kwargs
        data = {
            'authenticated':
            False,
            'authentication_state':
            None,
            'authentication_metadata': {},
            'username':
            None,
            'userguid':
            None,
            'roles': [],
            'identification': {},
            'storagerouter_ips':
            [sr.ip for sr in StorageRouterList.get_storagerouters()],
            'versions':
            list(settings.VERSION),
            'plugins': {}
        }
        try:
            # Gather plugin metadata
            plugins = {}
            # - Backends. BackendType plugins must set the has_plugin flag on True
            for backend_type in BackendTypeList.get_backend_types():
                if backend_type.has_plugin is True:
                    if backend_type.code not in plugins:
                        plugins[backend_type.code] = []
                    plugins[backend_type.code] += ['backend', 'gui']
            # - Generic plugins, as added to the configuration file(s)
            generic_plugins = Configuration.get('ovs.plugins.generic')
            for plugin_name in generic_plugins:
                if plugin_name not in plugins:
                    plugins[plugin_name] = []
                plugins[plugin_name] += ['gui']
            data['plugins'] = plugins

            # Fill identification
            data['identification'] = {
                'cluster_id': Configuration.get('ovs.support.cid')
            }

            # Get authentication metadata
            authentication_metadata = {'ip': System.get_my_storagerouter().ip}
            for key in ['mode', 'authorize_uri', 'client_id', 'scope']:
                if Configuration.exists('ovs.webapps.oauth2.{0}'.format(key)):
                    authentication_metadata[key] = Configuration.get(
                        'ovs.webapps.oauth2.{0}'.format(key))
            data['authentication_metadata'] = authentication_metadata

            # Gather authorization metadata
            if 'HTTP_AUTHORIZATION' not in request.META:
                return HttpResponse, dict(
                    data.items() +
                    {'authentication_state': 'unauthenticated'}.items())
            authorization_type, access_token = request.META[
                'HTTP_AUTHORIZATION'].split(' ')
            if authorization_type != 'Bearer':
                return HttpResponse, dict(
                    data.items() +
                    {'authentication_state': 'invalid_authorization_type'
                     }.items())
            tokens = BearerTokenList.get_by_access_token(access_token)
            if len(tokens) != 1:
                return HttpResponse, dict(
                    data.items() +
                    {'authentication_state': 'invalid_token'}.items())
            token = tokens[0]
            if token.expiration < time.time():
                for junction in token.roles.itersafe():
                    junction.delete()
                token.delete()
                return HttpResponse, dict(
                    data.items() +
                    {'authentication_state': 'token_expired'}.items())

            # Gather user metadata
            user = token.client.user
            if not user.is_active:
                return HttpResponse, dict(
                    data.items() +
                    {'authentication_state': 'inactive_user'}.items())
            roles = [j.role.code for j in token.roles]

            return HttpResponse, dict(
                data.items() + {
                    'authenticated': True,
                    'authentication_state': 'authenticated',
                    'username': user.username,
                    'userguid': user.guid,
                    'roles': roles,
                    'plugins': plugins
                }.items())
        except Exception as ex:
            logger.exception('Unexpected exception: {0}'.format(ex))
            return HttpResponse, dict(
                data.items() +
                {'authentication_state': 'unexpected_exception'}.items())
    def _live_status(self):
        """
        Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on:
            - Maintenance agents presence
            - Maintenance agents status
            - Disk statuses
        :return: Status as reported by the plugin
        :rtype: str
        """
        if self.backend.status == Backend.STATUSES.INSTALLING:
            return 'installing'

        if self.backend.status == Backend.STATUSES.DELETING:
            return 'deleting'

        # Verify failed disks
        devices = self.local_summary['devices']
        if devices['red'] > 0:
            self._logger.warning(
                'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks'
                .format(self.name, devices['red']))
            return AlbaBackend.STATUSES.FAILURE

        # Verify remote OSDs
        remote_errors = False
        linked_backend_warning = False
        for remote_info in self.remote_stack.itervalues():
            if remote_info['error'] == 'unknown' or remote_info[
                    'live_status'] == AlbaBackend.STATUSES.FAILURE:
                message = None
                if remote_info['error'] == 'unknown':
                    message = 'unknown remote error info'
                elif remote_info[
                        'live_status'] == AlbaBackend.STATUSES.FAILURE:
                    message = 'FAILURE in live_status'
                self._logger.warning(
                    'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} '
                    .format(self.name, remote_info['name'], message))
                return AlbaBackend.STATUSES.FAILURE
            if remote_info['error'] == 'not_allowed':
                remote_errors = True
            if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING:
                linked_backend_warning = True

        # Retrieve ASD and maintenance service information
        def _get_node_information(_node):
            if _node not in nodes_used_by_this_backend:
                for slot_info in _node.stack.itervalues():
                    for osd_info in slot_info['osds'].itervalues():
                        if osd_info['claimed_by'] == self.guid:
                            nodes_used_by_this_backend.add(_node)
                            break
                    if _node in nodes_used_by_this_backend:
                        break

            try:
                services = _node.maintenance_services
                if self.name in services:
                    for _service_name, _service_status in services[self.name]:
                        services_for_this_backend[_service_name] = _node
                        service_states[_service_name] = _service_status
                        if _node.node_id not in services_per_node:
                            services_per_node[_node.node_id] = 0
                        services_per_node[_node.node_id] += 1
            except Exception:
                pass

        services_for_this_backend = {}
        services_per_node = {}
        service_states = {}
        nodes_used_by_this_backend = set()
        threads = []
        all_nodes = AlbaNodeList.get_albanodes()
        for node in all_nodes:
            thread = Thread(target=_get_node_information, args=(node, ))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        zero_services = False
        if len(services_for_this_backend) == 0:
            if len(all_nodes) > 0:
                AlbaBackend._logger.error(
                    'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.FAILURE
            zero_services = True

        # Verify maintenance agents status
        for service_name, node in services_for_this_backend.iteritems():
            try:
                service_status = service_states.get(service_name)
                if service_status is None or service_status != 'active':
                    AlbaBackend._logger.error(
                        'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}'
                        .format(self.name, service_name))
                    return AlbaBackend.STATUSES.FAILURE
            except Exception:
                pass

        # Verify maintenance agents presence
        layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format(
            self.guid)
        layout = None
        if Configuration.exists(layout_key):
            layout = Configuration.get(layout_key)
            if not isinstance(layout, list) or not any(
                    node.node_id
                    for node in all_nodes if node.node_id in layout):
                layout = None

        if layout is None:
            config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(
                self.guid)
            expected_services = 3
            if Configuration.exists(config_key):
                expected_services = Configuration.get(config_key)
            expected_services = min(expected_services,
                                    len(nodes_used_by_this_backend)) or 1
            if len(services_for_this_backend) < expected_services:
                AlbaBackend._logger.warning(
                    'Live status for backend {0} is "warning": insufficient maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.WARNING
        else:
            for node_id in layout:
                if node_id not in services_per_node:
                    AlbaBackend._logger.warning(
                        'Live status for backend {0} is "warning": invalid maintenance service layout'
                        .format(self.name))
                    return AlbaBackend.STATUSES.WARNING

        # Verify local and remote OSDs
        if devices['orange'] > 0:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": one or more OSDs in warning'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        if remote_errors is True or linked_backend_warning is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": errors/warnings on remote stack'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING
        if zero_services is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": no maintenance services'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        return AlbaBackend.STATUSES.RUNNING
Beispiel #27
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        :param master_ips: IP addresses of the MASTER nodes
        :type master_ips: list or None
        :param extra_ips: IP addresses of the EXTRA nodes
        :type extra_ips: list or None
        """

        _ = master_ips, extra_ips
        working_version = previous_version

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        if working_version < ExtensionMigrator.THIS_VERSION:
            try:
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.dal.lists.vpoollist import VPoolList
                from ovs.extensions.generic.configuration import Configuration
                from ovs.extensions.services.servicefactory import ServiceFactory
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.system import System
                local_machine_id = System.get_my_machine_id()
                local_ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(local_machine_id))
                local_client = SSHClient(endpoint=local_ip, username='******')

                # Multiple Proxies
                if local_client.dir_exists(
                        directory=
                        '/opt/OpenvStorage/config/storagedriver/storagedriver'
                ):
                    local_client.dir_delete(directories=[
                        '/opt/OpenvStorage/config/storagedriver/storagedriver'
                    ])

                # MDS safety granularity on vPool level
                mds_safety_key = '/ovs/framework/storagedriver'
                if Configuration.exists(key=mds_safety_key):
                    current_mds_settings = Configuration.get(
                        key=mds_safety_key)
                    for vpool in VPoolList.get_vpools():
                        vpool_key = '/ovs/vpools/{0}'.format(vpool.guid)
                        if Configuration.dir_exists(key=vpool_key):
                            Configuration.set(
                                key='{0}/mds_config'.format(vpool_key),
                                value=current_mds_settings)
                    Configuration.delete(key=mds_safety_key)

                # Introduction of edition key
                if Configuration.get(key=Configuration.EDITION_KEY,
                                     default=None) not in [
                                         PackageFactory.EDITION_COMMUNITY,
                                         PackageFactory.EDITION_ENTERPRISE
                                     ]:
                    for storagerouter in StorageRouterList.get_storagerouters(
                    ):
                        try:
                            Configuration.set(
                                key=Configuration.EDITION_KEY,
                                value=storagerouter.features['alba']
                                ['edition'])
                            break
                        except:
                            continue

            except:
                ExtensionMigrator._logger.exception(
                    'Error occurred while executing the migration code')
                # Don't update migration version with latest version, resulting in next migration trying again to execute this code
                return ExtensionMigrator.THIS_VERSION - 1

        return ExtensionMigrator.THIS_VERSION
Beispiel #28
0
    def create_hprm_config_files(vpool_guid, local_storagerouter_guid,
                                 parameters):
        """
        Create the required configuration files to be able to make use of HPRM (aka PRACC)
        This configuration will be zipped and made available for download
        :param vpool_guid: The guid of the VPool for which a HPRM manager needs to be deployed
        :type vpool_guid: str
        :param local_storagerouter_guid: The guid of the StorageRouter the API was requested on
        :type local_storagerouter_guid: str
        :param parameters: Additional information required for the HPRM configuration files
        :type parameters: dict
        :return: Name of the zipfile containing the configuration files
        :rtype: str
        """
        # Validations
        required_params = {
            'port': (int, {
                'min': 1,
                'max': 65535
            }),
            'identifier': (str, ExtensionsToolbox.regex_vpool)
        }
        ExtensionsToolbox.verify_required_params(
            actual_params=parameters, required_params=required_params)
        vpool = VPool(vpool_guid)
        identifier = parameters['identifier']
        config_path = None
        local_storagerouter = StorageRouter(local_storagerouter_guid)
        for sd in vpool.storagedrivers:
            if len(sd.alba_proxies) == 0:
                raise ValueError(
                    'No ALBA proxies configured for vPool {0} on StorageRouter {1}'
                    .format(vpool.name, sd.storagerouter.name))
            config_path = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(
                vpool.guid, sd.alba_proxies[0].guid)

        if config_path is None:
            raise ValueError(
                'vPool {0} has not been extended any StorageRouter'.format(
                    vpool.name))
        proxy_cfg = Configuration.get(key=config_path.format('main'))

        cache_info = {}
        arakoons = {}
        cache_types = VPool.CACHES.values()
        if not any(ctype in parameters for ctype in cache_types):
            raise ValueError(
                'At least one cache type should be passed: {0}'.format(
                    ', '.join(cache_types)))
        for ctype in cache_types:
            if ctype not in parameters:
                continue
            required_dict = {'read': (bool, None), 'write': (bool, None)}
            required_params.update({ctype: (dict, required_dict)})
            ExtensionsToolbox.verify_required_params(
                actual_params=parameters, required_params=required_params)
            read = parameters[ctype]['read']
            write = parameters[ctype]['write']
            if read is False and write is False:
                cache_info[ctype] = ['none']
                continue
            path = parameters[ctype].get('path')
            if path is not None:
                path = path.strip()
                if not path or path.endswith(
                        '/.') or '..' in path or '/./' in path:
                    raise ValueError('Invalid path specified')
                required_dict.update({
                    'path': (str, None),
                    'size': (int, {
                        'min': 1,
                        'max': 10 * 1024
                    })
                })
                ExtensionsToolbox.verify_required_params(
                    actual_params=parameters, required_params=required_params)
                while '//' in path:
                    path = path.replace('//', '/')
                cache_info[ctype] = [
                    'local', {
                        'path': path,
                        'max_size': parameters[ctype]['size'] * 1024**3,
                        'cache_on_read': read,
                        'cache_on_write': write
                    }
                ]
            else:
                required_dict.update({
                    'backend_info': (dict, {
                        'preset': (str, ExtensionsToolbox.regex_preset),
                        'alba_backend_guid':
                        (str, ExtensionsToolbox.regex_guid),
                        'alba_backend_name':
                        (str, ExtensionsToolbox.regex_backend)
                    }),
                    'connection_info': (dict, {
                        'host': (str, ExtensionsToolbox.regex_ip, False),
                        'port': (int, {
                            'min': 1,
                            'max': 65535
                        }, False),
                        'client_id':
                        (str, ExtensionsToolbox.regex_guid, False),
                        'client_secret': (str, None, False)
                    })
                })
                ExtensionsToolbox.verify_required_params(
                    actual_params=parameters, required_params=required_params)
                connection_info = parameters[ctype]['connection_info']
                if connection_info[
                        'host']:  # Remote Backend for accelerated Backend
                    alba_backend_guid = parameters[ctype]['backend_info'][
                        'alba_backend_guid']
                    ovs_client = OVSClient.get_instance(
                        connection_info=connection_info)
                    arakoon_config = VPoolShared.retrieve_alba_arakoon_config(
                        alba_backend_guid=alba_backend_guid,
                        ovs_client=ovs_client)
                    arakoons[ctype] = ArakoonClusterConfig.convert_config_to(
                        arakoon_config, return_type='INI')
                else:  # Local Backend for accelerated Backend
                    alba_backend_name = parameters[ctype]['backend_info'][
                        'alba_backend_name']
                    if Configuration.exists(key='/ovs/arakoon/{0}-abm/config'.
                                            format(alba_backend_name),
                                            raw=True) is False:
                        raise ValueError(
                            'Arakoon cluster for ALBA Backend {0} could not be retrieved'
                            .format(alba_backend_name))
                    arakoons[ctype] = Configuration.get(
                        key='/ovs/arakoon/{0}-abm/config'.format(
                            alba_backend_name),
                        raw=True)
                cache_info[ctype] = [
                    'alba', {
                        'albamgr_cfg_url':
                        '/etc/hprm/{0}/{1}_cache_arakoon.ini'.format(
                            identifier, ctype),
                        'bucket_strategy': [
                            '1-to-1', {
                                'prefix':
                                vpool.guid,
                                'preset':
                                parameters[ctype]['backend_info']['preset']
                            }
                        ],
                        'manifest_cache_size':
                        proxy_cfg['manifest_cache_size'],
                        'cache_on_read':
                        read,
                        'cache_on_write':
                        write
                    }
                ]

        tgz_name = 'hprm_config_files_{0}_{1}.tgz'.format(
            identifier, vpool.name)
        config = {
            'ips': ['127.0.0.1'],
            'port': parameters['port'],
            'pracc': {
                'uds_path': '/var/run/hprm/{0}/uds_path'.format(identifier),
                'max_clients': 1000,
                'max_read_buf_size':
                64 * 1024,  # Buffer size for incoming requests (in bytes)
                'thread_pool_size': 64
            },  # Amount of threads
            'transport': 'tcp',
            'log_level': 'info',
            'read_preference': proxy_cfg['read_preference'],
            'albamgr_cfg_url': '/etc/hprm/{0}/arakoon.ini'.format(identifier),
            'manifest_cache_size': proxy_cfg['manifest_cache_size']
        }
        file_contents_map = {}
        for ctype in cache_types:
            if ctype in cache_info:
                config['{0}_cache'.format(ctype)] = cache_info[ctype]
            if ctype in arakoons:
                file_contents_map[
                    '/opt/OpenvStorage/config/{0}/{1}_cache_arakoon.ini'.
                    format(identifier, ctype)] = arakoons[ctype]
        file_contents_map.update({
            '/opt/OpenvStorage/config/{0}/config.json'.format(identifier):
            json.dumps(config, indent=4),
            '/opt/OpenvStorage/config/{0}/arakoon.ini'.format(identifier):
            Configuration.get(key=config_path.format('abm'), raw=True)
        })

        local_client = SSHClient(endpoint=local_storagerouter)
        local_client.dir_create(
            directories='/opt/OpenvStorage/config/{0}'.format(identifier))
        local_client.dir_create(
            directories='/opt/OpenvStorage/webapps/frontend/downloads')
        for file_name, contents in file_contents_map.iteritems():
            local_client.file_write(contents=contents, filename=file_name)
        local_client.run(command=[
            'tar', '--transform', 's#^config/{0}#{0}#'.format(identifier),
            '-czf', '/opt/OpenvStorage/webapps/frontend/downloads/{0}'.format(
                tgz_name), 'config/{0}'.format(identifier)
        ])
        local_client.dir_delete(
            directories='/opt/OpenvStorage/config/{0}'.format(identifier))
        return tgz_name
    def _local_stack(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        if len(self.abm_services) == 0:
            return {}  # No ABM services yet, so backend not fully installed yet

        alba_backend_map = {}
        for alba_backend in AlbaBackendList.get_albabackends():
            alba_backend_map[alba_backend.alba_id] = alba_backend

        # Load information based on the model
        asd_map = {}
        storage_map = {}
        alba_nodes = AlbaNodeList.get_albanodes()
        for node in alba_nodes:
            node_id = node.node_id
            storage_map[node_id] = {}
            for disk in node.disks:
                disk_id = disk.aliases[0].split('/')[-1]
                storage_map[node_id][disk_id] = {'asds': {},
                                                 'name': disk_id,
                                                 'guid': disk.guid,
                                                 'status': 'error',
                                                 'aliases': disk.aliases,
                                                 'status_detail': 'unknown'}
                for osd in disk.osds:
                    osd_id = osd.osd_id
                    data = {'asd_id': osd_id,
                            'guid': osd.guid,
                            'status': 'error',
                            'status_detail': 'unknown',
                            'alba_backend_guid': osd.alba_backend_guid}
                    asd_map[osd_id] = data
                    storage_map[node_id][disk_id]['asds'][osd_id] = data

        # Load information from node
        def _load_live_info(_node, _node_data):
            _data = _node.storage_stack
            if _data['status'] != 'ok':
                for disk_entry in _node_data.values():
                    disk_entry['status_detail'] = _data['status']
                    for entry in disk_entry.get('asds', {}).values():
                        entry['status_detail'] = _data['status']
            else:
                for _disk_id, disk_asd_info in _data['stack'].iteritems():
                    if _disk_id not in _node_data:
                        _node_data[_disk_id] = {'asds': {}}
                    entry = _node_data[_disk_id]
                    disk_info = copy.deepcopy(disk_asd_info)
                    del disk_info['asds']
                    entry.update(disk_info)
                    asds_info = disk_asd_info['asds']
                    for _asd_id, asd_info in asds_info.iteritems():
                        if _asd_id not in _node_data[_disk_id]['asds']:
                            _node_data[_disk_id]['asds'][_asd_id] = asd_info
                        else:
                            _node_data[_disk_id]['asds'][_asd_id].update(asd_info)

        threads = []
        for node in alba_nodes:
            thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id]))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Mix in usage information
        for asd_id, stats in self.asd_statistics.iteritems():
            if asd_id in asd_map:
                asd_map[asd_id]['usage'] = {'size': int(stats['capacity']),
                                            'used': int(stats['disk_usage']),
                                            'available': int(stats['capacity'] - stats['disk_usage'])}

        # Load information from alba
        backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid)
        if Configuration.exists(backend_interval_key):
            interval = Configuration.get(backend_interval_key)
        else:
            interval = Configuration.get('/ovs/alba/backends/global_gui_error_interval')
        config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name))
        asds = {}
        for found_osd in AlbaCLI.run(command='list-all-osds', config=config):
            asds[found_osd['long_id']] = found_osd
        for node_data in storage_map.values():
            for _disk in node_data.values():
                for asd_id, asd_data in _disk['asds'].iteritems():
                    if asd_id not in asds:
                        continue
                    found_osd = asds[asd_id]
                    if 'state' not in asd_data:
                        continue
                    if found_osd.get('decommissioned') is True:
                        asd_data['status'] = 'unavailable'
                        asd_data['status_detail'] = 'decommissioned'
                        continue
                    state = asd_data['state']
                    if state == 'ok':
                        if found_osd['id'] is None:
                            alba_id = found_osd['alba_id']
                            if alba_id is None:
                                asd_data['status'] = 'available'
                            else:
                                asd_data['status'] = 'unavailable'
                                alba_backend = alba_backend_map.get(alba_id)
                                if alba_backend is not None:
                                    asd_data['alba_backend_guid'] = alba_backend.guid
                        else:
                            asd_data['alba_backend_guid'] = self.guid
                            asd_data['status'] = 'warning'
                            asd_data['status_detail'] = 'recenterrors'

                            read = found_osd['read'] or [0]
                            write = found_osd['write'] or [0]
                            errors = found_osd['errors']
                            if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval):
                                asd_data['status'] = 'claimed'
                                asd_data['status_detail'] = ''
                    else:
                        asd_data['status'] = 'error'
                        asd_data['status_detail'] = asd_data.get('state_detail', '')
                        alba_backend = alba_backend_map.get(found_osd.get('alba_id'))
                        if alba_backend is not None:
                            asd_data['alba_backend_guid'] = alba_backend.guid
        return storage_map
Beispiel #30
0
    def get(self, request, *args, **kwargs):
        """
        Fetches metadata
        """
        _ = args, kwargs
        data = {'authenticated': False,
                'authentication_state': None,
                'authentication_metadata': {},
                'username': None,
                'userguid': None,
                'roles': [],
                'identification': {},
                'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()],
                'versions': list(settings.VERSION),
                'plugins': {}}
        try:
            # Gather plugin metadata
            plugins = {}
            # - Backends. BackendType plugins must set the has_plugin flag on True
            for backend_type in BackendTypeList.get_backend_types():
                if backend_type.has_plugin is True:
                    if backend_type.code not in plugins:
                        plugins[backend_type.code] = []
                    plugins[backend_type.code] += ['backend', 'gui']
            # - Generic plugins, as added to the configuration file(s)
            generic_plugins = Configuration.get('/ovs/framework/plugins/installed|generic')
            for plugin_name in generic_plugins:
                if plugin_name not in plugins:
                    plugins[plugin_name] = []
                plugins[plugin_name] += ['gui']
            data['plugins'] = plugins

            # Fill identification
            data['identification'] = {'cluster_id': Configuration.get('/ovs/framework/cluster_id')}

            # Get authentication metadata
            authentication_metadata = {'ip': System.get_my_storagerouter().ip}
            for key in ['mode', 'authorize_uri', 'client_id', 'scope']:
                if Configuration.exists('/ovs/framework/webapps|oauth2.{0}'.format(key)):
                    authentication_metadata[key] = Configuration.get('/ovs/framework/webapps|oauth2.{0}'.format(key))
            data['authentication_metadata'] = authentication_metadata

            # Gather authorization metadata
            if 'HTTP_AUTHORIZATION' not in request.META:
                return dict(data.items() + {'authentication_state': 'unauthenticated'}.items())
            authorization_type, access_token = request.META['HTTP_AUTHORIZATION'].split(' ')
            if authorization_type != 'Bearer':
                return dict(data.items() + {'authentication_state': 'invalid_authorization_type'}.items())
            tokens = BearerTokenList.get_by_access_token(access_token)
            if len(tokens) != 1:
                return dict(data.items() + {'authentication_state': 'invalid_token'}.items())
            token = tokens[0]
            if token.expiration < time.time():
                for junction in token.roles.itersafe():
                    junction.delete()
                token.delete()
                return dict(data.items() + {'authentication_state': 'token_expired'}.items())

            # Gather user metadata
            user = token.client.user
            if not user.is_active:
                return dict(data.items() + {'authentication_state': 'inactive_user'}.items())
            roles = [j.role.code for j in token.roles]

            return dict(data.items() + {'authenticated': True,
                                        'authentication_state': 'authenticated',
                                        'username': user.username,
                                        'userguid': user.guid,
                                        'roles': roles,
                                        'plugins': plugins}.items())
        except Exception as ex:
            MetadataView._logger.exception('Unexpected exception: {0}'.format(ex))
            return dict(data.items() + {'authentication_state': 'unexpected_exception'}.items())
Beispiel #31
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        AlbaMigrationController._logger.info(
            'Preparing out of band migrations...')

        from ovs.dal.hybrids.diskpartition import DiskPartition
        from ovs.dal.lists.albabackendlist import AlbaBackendList
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albaosdlist import AlbaOSDList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException
        from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator
        from ovs.extensions.packages.albapackagefactory import PackageFactory
        from ovs.extensions.services.albaservicefactory import ServiceFactory
        from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError
        from ovs.lib.alba import AlbaController
        from ovs.lib.disk import DiskController

        AlbaMigrationController._logger.info('Start out of band migrations...')

        #############################################
        # Introduction of IP:port combination on OSDs
        osd_info_map = {}
        alba_backends = AlbaBackendList.get_albabackends()
        for alba_backend in alba_backends:
            AlbaMigrationController._logger.info(
                'Verifying ALBA Backend {0}'.format(alba_backend.name))
            if alba_backend.abm_cluster is None:
                AlbaMigrationController._logger.warning(
                    'ALBA Backend {0} does not have an ABM cluster registered'.
                    format(alba_backend.name))
                continue

            AlbaMigrationController._logger.debug(
                'Retrieving configuration path for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                config = Configuration.get_configuration_path(
                    alba_backend.abm_cluster.config_location)
            except:
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve the configuration path for ALBA Backend {0}'
                    .format(alba_backend.name))
                continue

            AlbaMigrationController._logger.info(
                'Retrieving OSD information for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                osd_info = AlbaCLI.run(command='list-all-osds', config=config)
            except (AlbaError, RuntimeError):
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve OSD information for ALBA Backend {0}'.
                    format(alba_backend.name))
                continue

            for osd_info in osd_info:
                if osd_info.get('long_id'):
                    osd_info_map[osd_info['long_id']] = {
                        'ips': osd_info.get('ips', []),
                        'port': osd_info.get('port')
                    }

        for osd in AlbaOSDList.get_albaosds():
            if osd.osd_id not in osd_info_map:
                AlbaMigrationController._logger.warning(
                    'OSD with ID {0} is modelled but could not be found through ALBA'
                    .format(osd.osd_id))
                continue

            ips = osd_info_map[osd.osd_id]['ips']
            port = osd_info_map[osd.osd_id]['port']
            changes = False
            if osd.ips is None:
                changes = True
                osd.ips = ips
            if osd.port is None:
                changes = True
                osd.port = port
            if changes is True:
                AlbaMigrationController._logger.info(
                    'Updating OSD with ID {0} with IPS {1} and port {2}'.
                    format(osd.osd_id, ips, port))
                osd.save()

        ###################################################
        # Read preference for GLOBAL ALBA Backends (1.10.3)  (https://github.com/openvstorage/framework-alba-plugin/issues/452)
        if Configuration.get(key='/ovs/framework/migration|read_preference',
                             default=False) is False:
            try:
                name_backend_map = dict((alba_backend.name, alba_backend)
                                        for alba_backend in alba_backends)
                for alba_node in AlbaNodeList.get_albanodes():
                    AlbaMigrationController._logger.info(
                        'Processing maintenance services running on ALBA Node {0} with ID {1}'
                        .format(alba_node.ip, alba_node.node_id))
                    alba_node.invalidate_dynamics('maintenance_services')
                    for alba_backend_name, services in alba_node.maintenance_services.iteritems(
                    ):
                        if alba_backend_name not in name_backend_map:
                            AlbaMigrationController._logger.error(
                                'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled'
                                .format(alba_node.ip, alba_backend_name))
                            continue

                        alba_backend = name_backend_map[alba_backend_name]
                        AlbaMigrationController._logger.info(
                            'Processing {0} ALBA Backend {1} with GUID {2}'.
                            format(alba_backend.scaling, alba_backend.name,
                                   alba_backend.guid))
                        if alba_backend.scaling == alba_backend.SCALINGS.LOCAL:
                            read_preferences = [alba_node.node_id]
                        else:
                            read_preferences = AlbaController.get_read_preferences_for_global_backend(
                                alba_backend=alba_backend,
                                alba_node_id=alba_node.node_id,
                                read_preferences=[])

                        for service_name, _ in services:
                            AlbaMigrationController._logger.info(
                                'Processing service {0}'.format(service_name))
                            old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format(
                                alba_backend.guid)
                            new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format(
                                alba_backend.guid, service_name)
                            if Configuration.exists(key=old_config_key):
                                new_config = Configuration.get(
                                    key=old_config_key)
                                new_config[
                                    'read_preference'] = read_preferences
                                Configuration.set(key=new_config_key,
                                                  value=new_config)
                for alba_backend in alba_backends:
                    Configuration.delete(
                        key='/ovs/alba/backends/{0}/maintenance/config'.format(
                            alba_backend.guid))
                AlbaController.checkup_maintenance_agents.delay()

                Configuration.set(
                    key='/ovs/framework/migration|read_preference', value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating read preferences for ALBA Backends failed')

        #######################################################
        # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876)
        changed_clients = set()
        storagerouters = StorageRouterList.get_storagerouters()
        if Configuration.get(
                key=
                '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                default=False) is False:
            try:
                service_manager = ServiceFactory.get_manager()
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(
                    component=PackageFactory.COMP_ALBA)
                for storagerouter in storagerouters:
                    try:
                        root_client = SSHClient(
                            endpoint=storagerouter.ip, username='******'
                        )  # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time
                    except UnableToConnectException:
                        AlbaMigrationController._logger.exception(
                            'Updating actual package name for version files failed on StorageRouter {0}'
                            .format(storagerouter.ip))
                        continue

                    for file_name in root_client.file_list(
                            directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(
                            ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format(
                                PackageFactory.PKG_ALBA) in contents:
                            # Rewrite the version file in the RUN_FILE_DIR
                            contents = contents.replace(
                                PackageFactory.PKG_ALBA,
                                PackageFactory.PKG_ALBA_EE)
                            root_client.file_write(filename=file_path,
                                                   contents=contents)

                            # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management
                            service_name = file_name.split('.')[0]
                            service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(
                                storagerouter.machine_id, service_name)
                            if Configuration.exists(key=service_config_key):
                                service_config = Configuration.get(
                                    key=service_config_key)
                                if 'EXTRA_VERSION_CMD' in service_config:
                                    service_config[
                                        'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format(
                                            alba_pkg_name, alba_version_cmd)
                                    Configuration.set(key=service_config_key,
                                                      value=service_config)
                                    service_manager.regenerate_service(
                                        name='ovs-arakoon',
                                        client=root_client,
                                        target_name='ovs-{0}'.format(
                                            service_name)
                                    )  # Leave out .version
                                    changed_clients.add(root_client)
                Configuration.set(
                    key=
                    '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Executing command "systemctl daemon-reload" failed')

        ####################################
        # Fix for migration version (1.11.0)
        # Previous code could potentially store a higher version number in the config management than the actual version number
        if Configuration.get(
                key='/ovs/framework/migration|alba_migration_version_fix',
                default=False) is False:
            try:
                for storagerouter in storagerouters:
                    config_key = '/ovs/framework/hosts/{0}/versions'.format(
                        storagerouter.machine_id)
                    if Configuration.exists(key=config_key):
                        versions = Configuration.get(key=config_key)
                        if versions.get(PackageFactory.COMP_MIGRATION_ALBA,
                                        0) > ExtensionMigrator.THIS_VERSION:
                            versions[
                                PackageFactory.
                                COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION
                            Configuration.set(key=config_key, value=versions)
                Configuration.set(
                    key='/ovs/framework/migration|alba_migration_version_fix',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating migration version failed')

        ####################################
        # Enable auto-cleanup
        migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup'
        if Configuration.get(key=migration_auto_cleanup_key,
                             default=False) is False:
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    storagerouter.invalidate_dynamics(
                        'features')  # New feature was added
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_auto_cleanup(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_auto_cleanup_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ####################################
        # Change cache eviction
        migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random'
        if Configuration.get(key=migration_random_eviction_key,
                             default=False) is False:
            try:
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_cache_eviction(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_random_eviction_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ###################################################
        # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10)
        albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync'
        if not Configuration.get(key=albanode_backend_role_sync_key,
                                 default=False):
            try:
                errors = []
                for alba_node in AlbaNodeList.get_albanodes():
                    try:
                        if not alba_node.storagerouter:
                            continue
                        stack = alba_node.client.get_stack()  # type: dict
                        for slot_id, slot_information in stack.iteritems():
                            osds = slot_information.get('osds',
                                                        {})  # type: dict
                            slot_aliases = slot_information.get(
                                'aliases', [])  # type: list
                            if not osds:  # No osds means no partition was made
                                continue
                            # Sync to add all potential partitions that will need a backend role
                            DiskController.sync_with_reality(
                                storagerouter_guid=alba_node.storagerouter_guid
                            )
                            for disk in alba_node.storagerouter.disks:
                                if set(disk.aliases).intersection(
                                        set(slot_aliases)):
                                    partition = disk.partitions[0]
                                    if DiskPartition.ROLES.BACKEND not in partition.roles:
                                        partition.roles.append(
                                            DiskPartition.ROLES.BACKEND)
                                        partition.save()
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Syncing for storagerouter/albanode {0} failed'.
                            format(alba_node.storagerouter.ip))
                        errors.append(ex)
                if not errors:
                    Configuration.set(key=albanode_backend_role_sync_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Syncing up the disks for backend roles failed')

        AlbaMigrationController._logger.info('Finished out of band migrations')
    def check_vpool_cleanup(vpool_info, storagerouters=None):
        """
        Check if everything related to a vPool has been cleaned up on the storagerouters provided
        vpool_info should be a dictionary containing:
            - type
            - guid
            - files
            - directories
            - name (optional)
            - vpool (optional)
            If vpool is provided:
                - storagerouters need to be provided, because on these Storage Routers, we check whether the vPool has been cleaned up
            If name is provided:
                - If storagerouters is NOT provided, all Storage Routers will be checked for a correct vPool removal
                - If storagerouters is provided, only these Storage Routers will be checked for a correct vPool removal

        :param vpool_info: Information about the vPool
        :param storagerouters: Storage Routers to check if vPool has been cleaned up
        :return: None
        """
        for required_param in ["type", "guid", "files", "directories"]:
            if required_param not in vpool_info:
                raise ValueError("Incorrect vpool_info provided")
        if "vpool" in vpool_info and "name" in vpool_info:
            raise ValueError("vpool and name are mutually exclusive")
        if "vpool" not in vpool_info and "name" not in vpool_info:
            raise ValueError("Either vpool or vpool_name needs to be provided")

        vpool = vpool_info.get("vpool")
        vpool_name = vpool_info.get("name")
        vpool_guid = vpool_info["guid"]
        vpool_type = vpool_info["type"]
        files = vpool_info["files"]
        directories = vpool_info["directories"]

        supported_backend_types = GeneralBackend.get_valid_backendtypes()
        if vpool_type not in supported_backend_types:
            raise ValueError(
                "Unsupported Backend Type provided. Please choose from: {0}".format(", ".join(supported_backend_types))
            )
        if storagerouters is None:
            storagerouters = GeneralStorageRouter.get_storage_routers()

        if vpool_name is not None:
            assert (
                GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) is None
            ), "A vPool with name {0} still exists".format(vpool_name)

        # Prepare some fields to check
        vpool_name = vpool.name if vpool else vpool_name
        vpool_services = ["ovs-dtl_{0}".format(vpool_name), "ovs-volumedriver_{0}".format(vpool_name)]
        if vpool_type == "alba":
            vpool_services.append("ovs-albaproxy_{0}".format(vpool_name))

        # Check configuration
        if vpool is None:
            assert (
                Configuration.exists("/ovs/vpools/{0}".format(vpool_guid), raw=True) is False
            ), "vPool config still found in etcd"
        else:
            remaining_sd_ids = set([storagedriver.storagedriver_id for storagedriver in vpool.storagedrivers])
            current_sd_ids = set([item for item in Configuration.list("/ovs/vpools/{0}/hosts".format(vpool_guid))])
            assert not remaining_sd_ids.difference(
                current_sd_ids
            ), "There are more storagedrivers modelled than present in etcd"
            assert not current_sd_ids.difference(
                remaining_sd_ids
            ), "There are more storagedrivers in etcd than present in model"

        # Perform checks on all storagerouters where vpool was removed
        for storagerouter in storagerouters:

            # Check MDS services
            mds_services = GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.MD_SERVER)
            assert (
                len(
                    [
                        mds_service
                        for mds_service in mds_services
                        if mds_service.storagerouter_guid == storagerouter.guid
                    ]
                )
                == 0
            ), "There are still MDS services present for Storage Router {0}".format(storagerouter.ip)

            # Check services
            root_client = SSHClient(storagerouter, username="******")
            for service in vpool_services:
                if ServiceManager.has_service(service, client=root_client):
                    raise RuntimeError(
                        "Service {0} is still configured on Storage Router {1}".format(service, storagerouter.ip)
                    )

            # Check KVM vpool
            if GeneralHypervisor.get_hypervisor_type() == "KVM":
                vpool_overview = root_client.run(["virsh", "pool-list", "--all"]).splitlines()
                vpool_overview.pop(1)
                vpool_overview.pop(0)
                for vpool_info in vpool_overview:
                    kvm_vpool_name = vpool_info.split()[0].strip()
                    if vpool_name == kvm_vpool_name:
                        raise ValueError(
                            "vPool {0} is still defined on Storage Router {1}".format(vpool_name, storagerouter.ip)
                        )

            # Check file and directory existence
            if storagerouter.guid not in directories:
                raise ValueError("Could not find directory information for Storage Router {0}".format(storagerouter.ip))
            if storagerouter.guid not in files:
                raise ValueError("Could not find file information for Storage Router {0}".format(storagerouter.ip))

            for directory in directories[storagerouter.guid]:
                assert (
                    root_client.dir_exists(directory) is False
                ), "Directory {0} still exists on Storage Router {1}".format(directory, storagerouter.ip)
            for file_name in files[storagerouter.guid]:
                assert (
                    root_client.file_exists(file_name) is False
                ), "File {0} still exists on Storage Router {1}".format(file_name, storagerouter.ip)

            # Look for errors in storagedriver log
            for error_type in ["error", "fatal"]:
                cmd = "cat -vet /var/log/ovs/volumedriver/{0}.log | tail -1000 | grep ' {1} '; echo true > /dev/null".format(
                    vpool_name, error_type
                )
                errors = []
                for line in root_client.run(cmd, allow_insecure=True).splitlines():
                    if "HierarchicalArakoon" in line:
                        continue
                    errors.append(line)
                if len(errors) > 0:
                    if error_type == "error":
                        print "Volumedriver log file contains errors on Storage Router {0}\n - {1}".format(
                            storagerouter.ip, "\n - ".join(errors)
                        )
                    else:
                        raise RuntimeError(
                            "Fatal errors found in volumedriver log file on Storage Router {0}\n - {1}".format(
                                storagerouter.ip, "\n - ".join(errors)
                            )
                        )
    def validate_arakoon_config_files(storagerouters, cluster_name=None):
        """
        Verify whether all arakoon configurations are correct
        :param storagerouters: Storage Routers
        :param cluster_name: Name of the Arakoon cluster
        :return:
        """
        storagerouters.sort(key=lambda k: k.ip)
        TestArakoon.logger.info('Validating arakoon files for {0}'.format(', '.join([sr.ip for sr in storagerouters])))

        nr_of_configs_on_master = 0
        nr_of_configs_on_extra = 0

        node_ids = dict()
        matrix = dict()
        for sr in storagerouters:
            node_ids[sr.ip] = sr.machine_id
            configs_to_check = []
            matrix[sr] = dict()
            if cluster_name is not None:
                if Configuration.exists(GeneralArakoon.CONFIG_KEY.format(cluster_name), raw=True):
                    configs_to_check = [GeneralArakoon.CONFIG_KEY.format(cluster_name)]
            else:
                gen = Configuration.list(GeneralArakoon.CONFIG_ROOT)
                for entry in gen:
                    if 'nsm_' not in entry:
                        if Configuration.exists(GeneralArakoon.CONFIG_KEY.format(cluster_name), raw=True):
                            configs_to_check.append(GeneralArakoon.CONFIG_KEY.format(entry))
            for config_name in configs_to_check:
                config_contents = Configuration.get(configs_to_check[0], raw=True)
                matrix[sr][config_name] = hashlib.md5(config_contents).hexdigest()
            if sr.node_type == 'MASTER':
                nr_of_configs_on_master = len(matrix[sr])
            else:
                nr_of_configs_on_extra = len(matrix[sr])

        TestArakoon.logger.info('cluster_ids: {0}'.format(node_ids))
        TestArakoon.logger.info('matrix: {0}'.format(matrix))

        for config_file in matrix[storagerouters[0]].keys():
            TestArakoon.validate_arakoon_config_content(config_file, node_ids)

        assert len(storagerouters) == len(matrix.keys()), "not all nodes have arakoon configs"
        incorrect_nodes = list()
        for sr in matrix:
            is_master = sr.node_type == 'MASTER'
            if (is_master is True and len(matrix[sr]) != nr_of_configs_on_master) or\
                    (is_master is False and len(matrix[sr]) != nr_of_configs_on_extra):
                incorrect_nodes.append(sr.ip)
        assert len(incorrect_nodes) == 0, "Incorrect nr of configs on nodes: {0}".format(incorrect_nodes)

        md5sum_matrix = dict()
        incorrect_configs = list()
        for cfg in matrix[storagerouters[0]]:
            for sr in storagerouters:
                if cfg not in md5sum_matrix:
                    md5sum_matrix[cfg] = matrix[sr][cfg]
                elif matrix[sr][cfg] != md5sum_matrix[cfg]:
                    incorrect_configs.append("Incorrect contents {0} for {1} on {2}, expected {3}"
                                             .format(matrix[sr][cfg], sr.ip, cfg, md5sum_matrix[cfg]))

        assert len(incorrect_configs) == 0,\
            'Incorrect arakoon config contents: \n{0}'.format('\n'.join(incorrect_configs))
    def validate_vpool_sanity(expected_settings):
        """
        Check if all requirements are met for a healthy vPool
        :param expected_settings: Parameters used to create a vPool, which will be verified
        :type expected_settings: dict

        :return: None
        """
        if not isinstance(expected_settings, dict) or len(expected_settings) == 0:
            raise ValueError("Cannot validate vpool when no settings are passed")

        generic_settings = expected_settings.values()[0]
        vpool_name = generic_settings["vpool_name"]
        mountpoint = "/mnt/{0}".format(vpool_name)
        backend_type = generic_settings["type"]
        rdma_enabled = (
            generic_settings["config_params"]["dtl_transport"] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET
        )

        vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name)
        assert vpool is not None, "Could not find vPool with name {0}".format(vpool_name)
        vpool_config = GeneralVPool.get_configuration(vpool)

        # Verify some basic vPool attributes
        assert vpool.name == vpool_name, "Expected name {0} for vPool".format(vpool_name)
        assert vpool.status == VPool.STATUSES.RUNNING, "vPool does not have RUNNING status"
        assert vpool.rdma_enabled == rdma_enabled, "RDMA enabled setting is incorrect"
        assert set(expected_settings.keys()) == set(
            [sd.storagerouter for sd in vpool.storagedrivers]
        ), "vPool storagerouters don't match the expected Storage Routers"

        # Verify vPool Storage Driver configuration
        expected_vpool_config = copy.deepcopy(generic_settings["config_params"])
        for key, value in vpool_config.iteritems():
            if key == "dtl_enabled" or key == "tlog_multiplier" or key == "dtl_config_mode":
                continue
            if key not in expected_vpool_config:
                raise ValueError("Expected settings does not contain key {0}".format(key))

            if value != expected_vpool_config[key]:
                raise ValueError(
                    "vPool does not have expected configuration {0} for key {1}".format(expected_vpool_config[key], key)
                )
            expected_vpool_config.pop(key)

        if len(expected_vpool_config) > 0:
            raise ValueError(
                "Actual vPool configuration does not contain keys: {0}".format(", ".join(expected_vpool_config.keys()))
            )

        # Prepare some fields to check
        config = generic_settings["config_params"]
        dtl_mode = config["dtl_mode"]
        sco_size = config["sco_size"]
        cluster_size = config["cluster_size"]
        write_buffer = config["write_buffer"]
        dtl_transport = config["dtl_transport"]
        # @TODO: Add more validations for other expected settings (instead of None)
        expected_config = {
            "backend_connection_manager": {
                "backend_interface_retries_on_error": 5,
                "backend_interface_retry_interval_secs": 1,
                "backend_interface_retry_backoff_multiplier": 2.0,
            },
            "content_addressed_cache": {
                "clustercache_mount_points": None,
                "read_cache_serialization_path": u"/var/rsp/{0}".format(vpool.name),
            },
            "distributed_lock_store": {
                "dls_arakoon_cluster_id": None,
                "dls_arakoon_cluster_nodes": None,
                "dls_type": u"Arakoon",
            },
            "distributed_transaction_log": {"dtl_path": None, "dtl_transport": dtl_transport.upper()},
            "event_publisher": {"events_amqp_routing_key": u"volumerouter", "events_amqp_uris": None},
            "file_driver": {"fd_cache_path": None, "fd_extent_cache_capacity": u"1024", "fd_namespace": None},
            "filesystem": {
                "fs_dtl_config_mode": u"Automatic",
                "fs_dtl_mode": u"{0}".format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]),
                "fs_enable_shm_interface": 1,
                "fs_file_event_rules": None,
                "fs_metadata_backend_arakoon_cluster_nodes": None,
                "fs_metadata_backend_mds_nodes": None,
                "fs_metadata_backend_type": u"MDS",
                "fs_raw_disk_suffix": None,
                "fs_virtual_disk_format": None,
            },
            "metadata_server": {"mds_nodes": None},
            "scocache": {"backoff_gap": u"2GB", "scocache_mount_points": None, "trigger_gap": u"1GB"},
            "threadpool_component": {"num_threads": 16},
            "volume_manager": {
                "clean_interval": 1,
                "default_cluster_size": 1024 * cluster_size,
                "dtl_throttle_usecs": 4000,
                "metadata_path": None,
                "non_disposable_scos_factor": float(write_buffer)
                / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size]
                / sco_size,
                "number_of_scos_in_tlog": StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size],
                "tlog_path": None,
            },
            "volume_registry": {"vregistry_arakoon_cluster_id": u"voldrv", "vregistry_arakoon_cluster_nodes": None},
            "volume_router": {
                "vrouter_backend_sync_timeout_ms": 5000,
                "vrouter_file_read_threshold": 1024,
                "vrouter_file_write_threshold": 1024,
                "vrouter_id": None,
                "vrouter_max_workers": 16,
                "vrouter_migrate_timeout_ms": 5000,
                "vrouter_min_workers": 4,
                "vrouter_redirect_timeout_ms": u"5000",
                "vrouter_routing_retries": 10,
                "vrouter_sco_multiplier": 1024,
                "vrouter_volume_read_threshold": 1024,
                "vrouter_volume_write_threshold": 1024,
            },
            "volume_router_cluster": {"vrouter_cluster_id": None},
        }
        vpool_services = {
            "all": [
                "ovs-watcher-volumedriver",
                "ovs-dtl_{0}".format(vpool.name),
                "ovs-volumedriver_{0}".format(vpool.name),
                "ovs-volumerouter-consumer",
            ],
            "extra": [],
            "master": ["ovs-arakoon-voldrv"],
        }
        sd_partitions = {"DB": ["MD", "MDS", "TLOG"], "WRITE": ["FD", "DTL", "SCO"]}

        assert Configuration.exists("/ovs/arakoon/voldrv/config", raw=True), "Volumedriver arakoon does not exist"

        # Do some verifications for all SDs
        storage_ip = None
        voldrv_config = GeneralArakoon.get_config("voldrv")
        all_files = GeneralVPool.get_related_files(vpool=vpool)
        all_directories = GeneralVPool.get_related_directories(vpool=vpool)

        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            root_client = SSHClient(storagerouter, username="******")

            assert Configuration.exists(
                "/ovs/vpools/{0}/hosts/{1}/config".format(vpool.guid, storagedriver.storagedriver_id), raw=True
            ), "vPool config not found in configuration"
            # @todo: replace next lines with implementation defined in: http://jira.openvstorage.com/browse/OVS-4577
            # current_config_sections = set([item for item in Configuration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))])
            # assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in configuration'
            # assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in configuration'
            #
            # for key, values in expected_config.iteritems():
            #     current_config = Configuration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key))
            #     assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name)
            #
            #     for sub_key, value in current_config.iteritems():
            #         expected_value = values[sub_key]
            #         if expected_value is None:
            #             continue
            #         assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value)

            # Check services
            if storagerouter.node_type == "MASTER":
                for service_name in vpool_services["all"] + vpool_services["master"]:
                    if (
                        service_name == "ovs-arakoon-voldrv"
                        and GeneralStorageDriver.has_role(storagedriver, "DB") is False
                    ):
                        continue
                    exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client)
                    if exitcode is not True:
                        raise ValueError(
                            "Service {0} is not running on node {1} - {2}".format(
                                service_name, storagerouter.ip, output
                            )
                        )
            else:
                for service_name in vpool_services["all"] + vpool_services["extra"]:
                    exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client)
                    if exitcode is not True:
                        raise ValueError(
                            "Service {0} is not running on node {1} - {2}".format(
                                service_name, storagerouter.ip, output
                            )
                        )

            # Check arakoon config
            if not voldrv_config.has_section(storagerouter.machine_id):
                raise ValueError("Voldrv arakoon cluster does not have section {0}".format(storagerouter.machine_id))

            # Basic SD checks
            assert (
                storagedriver.cluster_ip == storagerouter.ip
            ), "Incorrect cluster IP. Expected: {0}  -  Actual: {1}".format(storagerouter.ip, storagedriver.cluster_ip)
            assert storagedriver.mountpoint == "/mnt/{0}".format(
                vpool.name
            ), "Incorrect mountpoint. Expected: {0}  -  Actual: {1}".format(mountpoint, storagedriver.mountpoint)
            if storage_ip is not None:
                assert (
                    storagedriver.storage_ip == storage_ip
                ), "Incorrect storage IP. Expected: {0}  -  Actual: {1}".format(storage_ip, storagedriver.storage_ip)
            storage_ip = storagedriver.storage_ip

            # Check required directories and files
            if storagerouter.guid not in all_directories:
                raise ValueError("Could not find directory information for Storage Router {0}".format(storagerouter.ip))
            if storagerouter.guid not in all_files:
                raise ValueError("Could not find file information for Storage Router {0}".format(storagerouter.ip))

            for directory in all_directories[storagerouter.guid]:
                if root_client.dir_exists(directory) is False:
                    raise ValueError(
                        "Directory {0} does not exist on Storage Router {1}".format(directory, storagerouter.ip)
                    )
            for file_name in all_files[storagerouter.guid]:
                if root_client.file_exists(file_name) is False:
                    raise ValueError(
                        "File {0} does not exist on Storage Router {1}".format(file_name, storagerouter.ip)
                    )

            # @TODO: check roles and sub_roles for all storagedrivers and not just once
            for partition in storagedriver.partitions:
                if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]:
                    sd_partitions[partition.role].remove(partition.sub_role)
                elif (
                    partition.role in sd_partitions
                    and partition.sub_role is None
                    and len(sd_partitions[partition.role])
                ):
                    sd_partitions[partition.role].remove("None")

            # Verify vPool writeable
            if GeneralHypervisor.get_hypervisor_type() == "VMWARE":
                GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client)

            vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client)
            GeneralVDisk.write_to_volume(
                vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs="1M", input_type="random"
            )
            GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client)

        for role, sub_roles in sd_partitions.iteritems():
            for sub_role in sub_roles:
                raise ValueError(
                    "Not a single Storage Driver found with partition role {0} and sub-role {1}".format(role, sub_role)
                )
Beispiel #35
0
    def remove_osd(node_guid, osd_id, expected_safety):
        """
        Removes an OSD
        :param node_guid: Guid of the node to remove an OSD from
        :type node_guid: str
        :param osd_id: ID of the OSD to remove
        :type osd_id: str
        :param expected_safety: Expected safety after having removed the OSD
        :type expected_safety: dict or None
        :return: Aliases of the disk on which the OSD was removed
        :rtype: list
        """
        # Retrieve corresponding OSD in model
        node = AlbaNode(node_guid)
        AlbaNodeController._logger.debug('Removing OSD {0} at node {1}'.format(
            osd_id, node.ip))
        osd = AlbaOSDList.get_by_osd_id(osd_id)
        alba_backend = osd.alba_backend

        if expected_safety is None:
            AlbaNodeController._logger.warning(
                'Skipping safety check for OSD {0} on backend {1} - this is dangerous'
                .format(osd_id, alba_backend.guid))
        else:
            final_safety = AlbaController.calculate_safety(
                alba_backend_guid=alba_backend.guid, removal_osd_ids=[osd_id])
            safety_lost = final_safety['lost']
            safety_crit = final_safety['critical']
            if (safety_crit != 0 or safety_lost != 0) and (
                    safety_crit != expected_safety['critical']
                    or safety_lost != expected_safety['lost']):
                raise RuntimeError(
                    'Cannot remove OSD {0} as the current safety is not as expected ({1} vs {2})'
                    .format(osd_id, final_safety, expected_safety))
            AlbaNodeController._logger.debug(
                'Safety OK for OSD {0} on backend {1}'.format(
                    osd_id, alba_backend.guid))
        AlbaNodeController._logger.debug(
            'Purging OSD {0} on backend {1}'.format(osd_id, alba_backend.guid))
        AlbaController.remove_units(alba_backend_guid=alba_backend.guid,
                                    osd_ids=[osd_id])

        # Delete the OSD
        result = node.client.delete_osd(slot_id=osd.slot_id, osd_id=osd_id)
        if result['_success'] is False:
            raise RuntimeError('Error removing OSD: {0}'.format(
                result['_error']))

        # Clean configuration management and model - Well, just try it at least
        if Configuration.exists(ASD_CONFIG.format(osd_id), raw=True):
            Configuration.delete(ASD_CONFIG_DIR.format(osd_id), raw=True)

        osd.delete()
        node.invalidate_dynamics()
        if alba_backend is not None:
            alba_backend.invalidate_dynamics()
            alba_backend.backend.invalidate_dynamics()
        if node.storagerouter is not None:
            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=node.storagerouter_guid)
            except UnableToConnectException:
                AlbaNodeController._logger.warning(
                    'Skipping disk sync since StorageRouter {0} is offline'.
                    format(node.storagerouter.name))

        return [osd.slot_id]
Beispiel #36
0
    def get(self, request, *args, **kwargs):
        """
        Fetches metadata
        """
        _ = args, kwargs
        data = {'authenticated': False,
                'authentication_state': None,
                'authentication_metadata': {},
                'username': None,
                'userguid': None,
                'roles': [],
                'identification': {},
                'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()],
                'versions': list(settings.VERSION),
                'plugins': {},
                'registration': {'registered': False,
                                 'remaining': None}}
        try:
            # Gather plugin metadata
            plugins = {}
            # - Backends. BackendType plugins must set the has_plugin flag on True
            for backend_type in BackendTypeList.get_backend_types():
                if backend_type.has_plugin is True:
                    if backend_type.code not in plugins:
                        plugins[backend_type.code] = []
                    plugins[backend_type.code] += ['backend', 'gui']
            # - Generic plugins, as added to the configuration file(s)
            generic_plugins = Configuration.get('ovs.plugins.generic')
            for plugin_name in generic_plugins:
                if plugin_name not in plugins:
                    plugins[plugin_name] = []
                plugins[plugin_name] += ['gui']
            data['plugins'] = plugins

            # Fill identification
            data['identification'] = {'cluster_id': Configuration.get('ovs.support.cid')}

            # Registration data
            registered = Configuration.get('ovs.core.registered')
            data['registration']['registered'] = registered
            if registered is False:
                cluster_install_time = None
                for storagerouter in StorageRouterList.get_storagerouters():
                    client = SSHClient(storagerouter)
                    install_time = client.config_read('ovs.core.install_time')
                    if cluster_install_time is None or (install_time is not None and install_time < cluster_install_time):
                        cluster_install_time = install_time
                if cluster_install_time is not None:
                    timeout_days = 30 * 24 * 60 * 60
                    data['registration']['remaining'] = (timeout_days - time.time() + cluster_install_time) / 24 / 60 / 60

            # Get authentication metadata
            authentication_metadata = {'ip': System.get_my_storagerouter().ip}
            for key in ['mode', 'authorize_uri', 'client_id', 'scope']:
                if Configuration.exists('ovs.webapps.oauth2.{0}'.format(key)):
                    authentication_metadata[key] = Configuration.get('ovs.webapps.oauth2.{0}'.format(key))
            data['authentication_metadata'] = authentication_metadata

            # Gather authorization metadata
            if 'HTTP_AUTHORIZATION' not in request.META:
                return HttpResponse, dict(data.items() + {'authentication_state': 'unauthenticated'}.items())
            authorization_type, access_token = request.META['HTTP_AUTHORIZATION'].split(' ')
            if authorization_type != 'Bearer':
                return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_authorization_type'}.items())
            tokens = BearerTokenList.get_by_access_token(access_token)
            if len(tokens) != 1:
                return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_token'}.items())
            token = tokens[0]
            if token.expiration < time.time():
                for junction in token.roles.itersafe():
                    junction.delete()
                token.delete()
                return HttpResponse, dict(data.items() + {'authentication_state': 'token_expired'}.items())

            # Gather user metadata
            user = token.client.user
            if not user.is_active:
                return HttpResponse, dict(data.items() + {'authentication_state': 'inactive_user'}.items())
            roles = [j.role.code for j in token.roles]

            return HttpResponse, dict(data.items() + {'authenticated': True,
                                                      'authentication_state': 'authenticated',
                                                      'username': user.username,
                                                      'userguid': user.guid,
                                                      'roles': roles,
                                                      'plugins': plugins}.items())
        except Exception as ex:
            logger.exception('Unexpected exception: {0}'.format(ex))
            return HttpResponse, dict(data.items() + {'authentication_state': 'unexpected_exception'}.items())
    def get_update_information_alba_plugin(information):
        """
        Called when the 'Update' button in the GUI is pressed
        This call collects additional information about the packages which can be updated
        Eg:
            * Downtime for Arakoons
            * Downtime for StorageDrivers
            * Prerequisites that haven't been met
            * Services which will be stopped during update
            * Services which will be restarted after update
        """
        # Verify arakoon info
        arakoon_ovs_info = {'down': False,
                            'name': None,
                            'internal': False}
        arakoon_cacc_info = {'down': False,
                             'name': None,
                             'internal': False}
        for cluster in ['cacc', 'ovsdb']:
            cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
            if cluster_name is None:
                continue

            if cluster == 'cacc':
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip)
            else:
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

            if arakoon_metadata['internal'] is True:
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc'))
                config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None)
                if cluster == 'ovsdb':
                    arakoon_ovs_info['down'] = len(config.nodes) < 3
                    arakoon_ovs_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_ovs_info['internal'] = True
                else:
                    arakoon_cacc_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_cacc_info['internal'] = True

        # Verify StorageRouter downtime
        fwk_prerequisites = []
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            try:
                SSHClient(endpoint=storagerouter, username='******')
            except UnableToConnectException:
                fwk_prerequisites.append(['node_down', storagerouter.name])

        # Verify ALBA node responsiveness
        alba_prerequisites = []
        for alba_node in AlbaNodeList.get_albanodes():
            try:
                alba_node.client.get_metadata()
            except Exception:
                alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip])

        for key in ['framework', 'alba']:
            if key not in information:
                information[key] = {'packages': {},
                                    'downtime': [],
                                    'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites,
                                    'services_stop_start': set(),
                                    'services_post_update': set()}

            for storagerouter in StorageRouterList.get_storagerouters():
                if key not in storagerouter.package_information:
                    continue

                # Retrieve Arakoon issues
                arakoon_downtime = []
                arakoon_services = []
                for service in storagerouter.services:
                    if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]:
                        continue

                    if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                        cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend)
                    else:
                        cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number)
                    if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False:
                        continue
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)
                    if arakoon_metadata['internal'] is True:
                        arakoon_services.append('ovs-{0}'.format(service.name))
                        config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                        config.load_config()
                        if len(config.nodes) < 3:
                            if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR:
                                arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name])
                            else:
                                arakoon_downtime.append(['backend', service.abm_service.alba_backend.name])

                for package_name, package_info in storagerouter.package_information[key].iteritems():
                    if package_name not in AlbaUpdateController.alba_plugin_packages:
                        continue  # Only gather information for the core packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)

                    if package_name == 'openvstorage-backend':
                        if ['gui', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['gui', None])
                        if ['api', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['api', None])
                        information[key]['services_stop_start'].update({'watcher-framework', 'memcached'})
                    elif package_name == 'alba':
                        for down in arakoon_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(arakoon_services)
                    elif package_name == 'arakoon':
                        if key == 'framework':
                            framework_arakoons = set()
                            if arakoon_ovs_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name']))
                            if arakoon_cacc_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name']))

                            information[key]['services_post_update'].update(framework_arakoons)
                            if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['ovsdb', None])
                        else:
                            for down in arakoon_downtime:
                                if down not in information[key]['downtime']:
                                    information[key]['downtime'].append(down)
                            information[key]['services_post_update'].update(arakoon_services)

            for alba_node in AlbaNodeList.get_albanodes():
                for package_name, package_info in alba_node.package_information.get(key, {}).iteritems():
                    if package_name not in AlbaUpdateController.sdm_packages:
                        continue  # Only gather information for the SDM packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)
        return information
Beispiel #38
0
    def _deploy_stack_and_scrub(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information:
                           `scrub_path` with the path where to scrub
                           `storage_router` with the StorageRouter that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled (by reference)
        :type error_messages: list
        :return: None
        :rtype: NoneType
        """
        if len(vpool.storagedrivers
               ) == 0 or not vpool.storagedrivers[0].storagedriver_id:
            error_messages.append(
                'vPool {0} does not have any valid StorageDrivers configured'.
                format(vpool.name))
            return

        service_manager = ServiceFactory.get_manager()
        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        partition_guid = scrub_info['partition_guid']
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_{2}_scrub'.format(
            vpool.name, storagerouter.name, partition_guid)
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, partition_guid)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, partition_guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, partition_guid)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if service_manager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and service_manager.get_service_status(
                        name=alba_proxy_service, client=client) == 'active':
                    GenericController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    with volatile_mutex('deploy_proxy_for_scrub_{0}'.format(
                            storagerouter.guid),
                                        wait=30):
                        port = System.get_free_ports(selected_range=port_range,
                                                     nr=1,
                                                     client=client)[0]
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path(alba_proxy_service),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    service_manager.add_service(name='ovs-albaproxy',
                                                params=params,
                                                client=client,
                                                target_name=alba_proxy_service)
                    service_manager.start_service(name=alba_proxy_service,
                                                  client=client)
                    GenericController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                if backend_config.get('backend_type') != 'MULTI':
                    backend_config['alba_connection_host'] = '127.0.0.1'
                    backend_config['alba_connection_port'] = scrub_config[
                        'port']
                else:
                    for value in backend_config.itervalues():
                        if isinstance(value, dict):
                            value['alba_connection_host'] = '127.0.0.1'
                            value['alba_connection_port'] = scrub_config[
                                'port']
                # Copy backend connection manager information in separate key
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            GenericController._logger.exception(message)
            if client is not None and service_manager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if service_manager.get_service_status(
                        name=alba_proxy_service, client=client) == 'active':
                    service_manager.stop_service(name=alba_proxy_service,
                                                 client=client)
                service_manager.remove_service(name=alba_proxy_service,
                                               client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        # Execute the actual scrubbing
        threads = []
        threads_key = '/ovs/framework/hosts/{0}/config|scrub_stack_threads'.format(
            storagerouter.machine_id)
        amount_threads = Configuration.get(
            key=threads_key) if Configuration.exists(key=threads_key) else 2
        if not isinstance(amount_threads, int):
            error_messages.append(
                'Amount of threads to spawn must be an integer for StorageRouter with ID {0}'
                .format(storagerouter.machine_id))
            return

        amount_threads = max(amount_threads,
                             1)  # Make sure amount_threads is at least 1
        amount_threads = min(min(queue.qsize(), amount_threads),
                             20)  # Make sure amount threads is max 20
        GenericController._logger.info(
            'Scrubber - vPool {0} - StorageRouter {1} - Spawning {2} threads for proxy service {3}'
            .format(vpool.name, storagerouter.name, amount_threads,
                    alba_proxy_service))
        for index in range(amount_threads):
            thread = Thread(name='execute_scrub_{0}_{1}_{2}'.format(
                vpool.guid, partition_guid, index),
                            target=GenericController._execute_scrub,
                            args=(queue, vpool, scrub_info, scrub_directory,
                                  error_messages))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if service_manager.has_service(alba_proxy_service,
                                               client=client):
                    service_manager.stop_service(alba_proxy_service,
                                                 client=client)
                    service_manager.remove_service(alba_proxy_service,
                                                   client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            GenericController._logger.exception(message)
Beispiel #39
0
    def cluster_registry_checkup():
        """
        Verify whether changes have occurred in the cluster registry for each vPool
        :return: Information whether changes occurred
        :rtype: dict
        """
        changed_vpools = {}
        for vpool in VPoolList.get_vpools():
            changed_vpools[vpool.guid] = {'changes': False,
                                          'success': True}
            try:
                StorageDriverController._logger.info('Validating cluster registry settings for Vpool {0}'.format(vpool.guid))

                current_configs = vpool.clusterregistry_client.get_node_configs()
                changes = len(current_configs) == 0
                node_configs = []
                for sd in vpool.storagedrivers:
                    sd.invalidate_dynamics(['cluster_node_config'])
                    new_config = sd.cluster_node_config
                    node_configs.append(ClusterNodeConfig(**new_config))
                    if changes is False:
                        current_node_configs = [config for config in current_configs if config.vrouter_id == sd.storagedriver_id]
                        if len(current_node_configs) == 1:
                            current_node_config = current_node_configs[0]
                            for key in new_config:
                                if getattr(current_node_config, key) != new_config[key]:
                                    changes = True
                                    break
                changed_vpools[vpool.guid]['changes'] = changes

                if changes is True:
                    StorageDriverController._logger.info('Cluster registry settings for Vpool {0} needs to be updated'.format(vpool.guid))
                    available_storagedrivers = []
                    for sd in vpool.storagedrivers:
                        storagerouter = sd.storagerouter
                        try:
                            SSHClient(storagerouter, username='******')
                            with remote(storagerouter.ip, [LocalStorageRouterClient]) as rem:
                                sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, sd.storagedriver_id)
                                if Configuration.exists(sd_key) is True:
                                    path = Configuration.get_configuration_path(sd_key)
                                    lsrc = rem.LocalStorageRouterClient(path)
                                    lsrc.server_revision()  # 'Cheap' call to verify whether volumedriver is responsive
                                    available_storagedrivers.append(sd)
                        except UnableToConnectException:
                            StorageDriverController._logger.warning('StorageRouter {0} not available.'.format(storagerouter.name))
                        except Exception as ex:
                            if 'ClusterNotReachableException' in str(ex):
                                StorageDriverController._logger.warning('StorageDriver {0} on StorageRouter {1} not available.'.format(
                                    sd.guid, storagerouter.name
                                ))
                            else:
                                StorageDriverController._logger.exception('Got exception when validating StorageDriver {0} on StorageRouter {1}.'.format(
                                    sd.guid, storagerouter.name
                                ))

                    StorageDriverController._logger.info('Updating cluster node configs for VPool {0}'.format(vpool.guid))
                    vpool.clusterregistry_client.set_node_configs(node_configs)
                    for sd in available_storagedrivers:
                        StorageDriverController._logger.info('Trigger config reload for StorageDriver {0}'.format(sd.guid))
                        vpool.storagedriver_client.update_cluster_node_configs(str(sd.storagedriver_id), req_timeout_secs=10)
                    StorageDriverController._logger.info('Updating cluster node configs for Vpool {0} completed'.format(vpool.guid))
                else:
                    StorageDriverController._logger.info('Cluster registry settings for Vpool {0} is up to date'.format(vpool.guid))
            except Exception as ex:
                StorageDriverController._logger.exception('Got exception when validating cluster registry settings for Vpool {0}.'.format(vpool.name))
                changed_vpools[vpool.guid]['success'] = False
                changed_vpools[vpool.guid]['error'] = ex.message
        return changed_vpools
    def remove_asd(node_guid, asd_id, expected_safety):
        """
        Removes an ASD
        :param node_guid: Guid of the node to remove an ASD from
        :type node_guid: str
        :param asd_id: ID of the ASD to remove
        :type asd_id: str
        :param expected_safety: Expected safety after having removed the ASD
        :type expected_safety: dict or None
        :return: Aliases of the disk on which the ASD was removed
        :rtype: list
        """
        node = AlbaNode(node_guid)
        AlbaNodeController._logger.debug('Removing ASD {0} at node {1}'.format(asd_id, node.ip))
        model_osd = None
        for disk in node.disks:
            for asd in disk.osds:
                if asd.osd_id == asd_id:
                    model_osd = asd
                    break
            if model_osd is not None:
                break
        if model_osd is not None:
            alba_backend = model_osd.alba_backend
        else:
            alba_backend = None

        asds = {}
        try:
            asds = node.client.get_asds()
        except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError):
            AlbaNodeController._logger.warning('Could not connect to node {0} to validate ASD'.format(node.guid))
        partition_alias = None
        for alias, asd_ids in asds.iteritems():
            if asd_id in asd_ids:
                partition_alias = alias
                break

        if alba_backend is not None:
            if expected_safety is None:
                AlbaNodeController._logger.warning('Skipping safety check for ASD {0} on backend {1} - this is dangerous'.format(asd_id, alba_backend.guid))
            else:
                final_safety = AlbaController.calculate_safety(alba_backend_guid=alba_backend.guid,
                                                               removal_osd_ids=[asd_id])
                safety_lost = final_safety['lost']
                safety_crit = final_safety['critical']
                if (safety_crit != 0 or safety_lost != 0) and (safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']):
                    raise RuntimeError('Cannot remove ASD {0} as the current safety is not as expected ({1} vs {2})'.format(asd_id, final_safety, expected_safety))
                AlbaNodeController._logger.debug('Safety OK for ASD {0} on backend {1}'.format(asd_id, alba_backend.guid))
            AlbaNodeController._logger.debug('Purging ASD {0} on backend {1}'.format(asd_id, alba_backend.guid))
            AlbaController.remove_units(alba_backend_guid=alba_backend.guid,
                                        osd_ids=[asd_id])
        else:
            AlbaNodeController._logger.warning('Could not match ASD {0} to any backend. Cannot purge'.format(asd_id))

        disk_data = None
        if partition_alias is not None:
            AlbaNodeController._logger.debug('Removing ASD {0} from disk {1}'.format(asd_id, partition_alias))
            for device_info in node.client.get_disks().itervalues():
                if partition_alias in device_info['partition_aliases']:
                    disk_data = device_info
                    result = node.client.delete_asd(disk_id=device_info['aliases'][0].split('/')[-1],
                                                    asd_id=asd_id)
                    if result['_success'] is False:
                        raise RuntimeError('Error removing ASD: {0}'.format(result['_error']))
            if disk_data == {}:
                raise RuntimeError('Failed to find disk for partition with alias {0}'.format(partition_alias))
        else:
            AlbaNodeController._logger.warning('Could not remove ASD from remote node (node down)'.format(asd_id))

        if Configuration.exists(AlbaNodeController.ASD_CONFIG.format(asd_id), raw=True):
            Configuration.delete(AlbaNodeController.ASD_CONFIG_DIR.format(asd_id), raw=True)

        if model_osd is not None:
            model_osd.delete()
        if alba_backend is not None:
            alba_backend.invalidate_dynamics()
            alba_backend.backend.invalidate_dynamics()
        if node.storagerouter is not None:
            DiskController.sync_with_reality(storagerouter_guid=node.storagerouter_guid)

        return [] if disk_data is None else disk_data.get('aliases', [])
Beispiel #41
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again
        *     Eg: /ovs/framework/migration|stats_monkey_integration: True
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.db.arakooninstaller import ArakoonInstaller
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator
        from ovs.extensions.packages.packagefactory import PackageFactory
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip,  # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='arakoon',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name),
                                                    new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name))

                # Register new service and remove old service
                service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']:
                            proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info
                            Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
                if service_manager.__class__ == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        #####################################
        # Update the vPool metadata structure
        def _update_metadata_structure(metadata):
            metadata = copy.deepcopy(metadata)
            cache_structure = {'read': False,
                               'write': False,
                               'is_backend': False,
                               'quota': None,
                               'backend_info': {'name': None,  # Will be filled in when is_backend is true
                                                'backend_guid': None,
                                                'alba_backend_guid': None,
                                                'policies': None,
                                                'preset': None,
                                                'arakoon_config': None,
                                                'connection_info': {'client_id': None,
                                                                    'client_secret': None,
                                                                    'host': None,
                                                                    'port': None,
                                                                    'local': None}}
                               }
            structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read',
                                                                      'write': 'block_cache_on_write',
                                                                      'quota': 'quota_bc',
                                                                      'backend_prefix': 'backend_bc_{0}'},
                             StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read',
                                                                         'write': 'fragment_cache_on_write',
                                                                         'quota': 'quota_fc',
                                                                         'backend_prefix': 'backend_aa_{0}'}}
            if 'arakoon_config' in metadata['backend']:  # Arakoon config should be placed under the backend info
                metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config')
            if 'connection_info' in metadata['backend']:  # Connection info sohuld be placed under the backend info
                metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info')
            if 'caching_info' not in metadata:  # Caching info is the new key
                would_be_caching_info = {}
                metadata['caching_info'] = would_be_caching_info
                # Extract all caching data for every storagerouter
                current_caching_info = metadata['backend'].pop('caching_info')  # Pop to mutate metadata
                for storagerouter_guid in current_caching_info.iterkeys():
                    current_cache_data = current_caching_info[storagerouter_guid]
                    storagerouter_caching_info = {}
                    would_be_caching_info[storagerouter_guid] = storagerouter_caching_info
                    for cache_type, cache_type_mapping in structure_map.iteritems():
                        new_cache_structure = copy.deepcopy(cache_structure)
                        storagerouter_caching_info[cache_type] = new_cache_structure
                        for new_structure_key, old_structure_key in cache_type_mapping.iteritems():
                            if new_structure_key == 'backend_prefix':
                                # Get possible backend related info
                                metadata_key = old_structure_key.format(storagerouter_guid)
                                if metadata_key not in metadata:
                                    continue
                                backend_data = metadata.pop(metadata_key)  # Pop to mutate metadata
                                new_cache_structure['is_backend'] = True
                                # Copy over the old data
                                new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config']
                                new_cache_structure['backend_info'].update(backend_data['backend_info'])
                                new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info'])
                            else:
                                new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key)
            return metadata

        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            try:
                new_metadata = _update_metadata_structure(vpool.metadata)
                vpool.metadata = new_metadata
                vpool.save()
            except KeyError:
                MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name))

        ##############################################
        # Always use indent=4 during Configuration set
        def _resave_all_config_entries(config_path='/ovs'):
            """
            Recursive functions which checks every config management key if its a directory or not.
            If not a directory, we retrieve the config and just save it again using the new indentation logic
            """
            for item in Configuration.list(config_path):
                new_path = config_path + '/' + item
                print new_path
                if Configuration.dir_exists(new_path) is True:
                    _resave_all_config_entries(config_path=new_path)
                else:
                    try:
                        _config = Configuration.get(new_path)
                        Configuration.set(new_path, _config)
                    except:
                        _config = Configuration.get(new_path, raw=True)
                        Configuration.set(new_path, _config, raw=True)
        if ExtensionMigrator.THIS_VERSION <= 13:  # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower
            MigrationController._logger.info('Re-saving every configuration setting with new indentation rules')
            _resave_all_config_entries()

        ############################
        # Update some default values
        def _update_manifest_cache_size(_proxy_config_key):
            updated = False
            manifest_cache_size = 500 * 1024 * 1024
            if Configuration.exists(key=_proxy_config_key):
                _proxy_config = Configuration.get(key=_proxy_config_key)
                for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]:
                    if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba':
                        if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size:
                            updated = True
                            _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size
                if _proxy_config['manifest_cache_size'] != manifest_cache_size:
                    updated = True
                    _proxy_config['manifest_cache_size'] = manifest_cache_size

                if updated is True:
                    Configuration.set(key=_proxy_config_key, value=_proxy_config)
            return updated

        for storagedriver in StorageDriverList.get_storagedrivers():
            try:
                vpool = storagedriver.vpool
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))  # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services
                for alba_proxy in storagedriver.alba_proxies:
                    if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True:
                        # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                        ExtensionsToolbox.edit_version_file(client=root_client,
                                                            package_name='alba',
                                                            old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name))

                # Update 'backend_connection_manager' section
                changes = False
                storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
                if 'backend_connection_manager' not in storagedriver_config.configuration:
                    continue

                current_config = storagedriver_config.configuration['backend_connection_manager']
                for key, value in current_config.iteritems():
                    if key.isdigit() is True:
                        if value.get('alba_connection_asd_connection_pool_capacity') != 10:
                            changes = True
                            value['alba_connection_asd_connection_pool_capacity'] = 10
                        if value.get('alba_connection_timeout') != 30:
                            changes = True
                            value['alba_connection_timeout'] = 30
                        if value.get('alba_connection_rora_manifest_cache_capacity') != 25000:
                            changes = True
                            value['alba_connection_rora_manifest_cache_capacity'] = 25000

                if changes is True:
                    storagedriver_config.clear_backend_connection_manager()
                    storagedriver_config.configure_backend_connection_manager(**current_config)
                    storagedriver_config.save(root_client)

                    # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='volumedriver',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
            except Exception:
                MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id))

        ####################################################
        # Adding proxy fail fast as env variable for proxies
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-albaproxy_'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'Environment=ALBA_FAIL_FAST=true'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'env ALBA_FAIL_FAST=true'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='alba',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        ######################################
        # Integration of stats monkey (2.10.2)
        if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False:
            try:
                # Get content of old key into new key
                old_stats_monkey_key = '/statsmonkey/statsmonkey'
                if Configuration.exists(key=old_stats_monkey_key) is True:
                    Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key))
                    Configuration.delete(key=old_stats_monkey_key)

                # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before
                celery_key = '/ovs/framework/scheduling/celery'
                current_value = None
                scheduling_config = Configuration.get(key=celery_key, default={})
                if 'statsmonkey.run_all_stats' in scheduling_config:  # Old celery task name of the stats monkey
                    current_value = scheduling_config.pop('statsmonkey.run_all_stats')
                scheduling_config['ovs.stats_monkey.run_all'] = current_value
                scheduling_config['alba.stats_monkey.run_all'] = current_value
                Configuration.set(key=celery_key, value=scheduling_config)

                support_key = '/ovs/framework/support'
                support_config = Configuration.get(key=support_key)
                support_config['support_agent'] = support_config.pop('enabled', True)
                support_config['remote_access'] = support_config.pop('enablesupport', False)
                Configuration.set(key=support_key, value=support_config)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True)
            except Exception:
                MigrationController._logger.exception('Integration of stats monkey failed')

        ######################################################
        # Write away cluster ID to a file for back-up purposes
        try:
            cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None)
            with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file:
                config = json.load(config_file)
            if cluster_id is not None and config.get('cluster_id', None) is None:
                config['cluster_id'] = cluster_id
                with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file:
                    json.dump(config, config_file, indent=4)
        except Exception:
            MigrationController._logger.exception('Writing cluster id to a file failed.')

        #########################################################
        # Additional string formatting in Arakoon services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False:
                arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')]
                for storagerouter in StorageRouterList.get_masters():
                    for service_name in arakoon_service_names:
                        config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name)
                        if Configuration.exists(key=config_key):
                            config = Configuration.get(key=config_key)
                            config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                            config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON
                            config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON
                            Configuration.set(key=config_key, value=config)
                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in ALBA proxy services (2.11)
        changed_clients = set()
        try:
            if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False:
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA)
                for service in ServiceTypeList.get_by_name('AlbaProxy').services:
                    root_client = sr_client_map[service.storagerouter_guid]
                    config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name)
                    if Configuration.exists(key=config_key):
                        config = Configuration.get(key=config_key)
                        config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                        config['ALBA_PKG_NAME'] = alba_pkg_name
                        config['ALBA_VERSION_CMD'] = alba_version_cmd
                        Configuration.set(key=config_key, value=config)
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                                           client=root_client,
                                                           target_name='ovs-{0}'.format(service.name))
                        changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in DTL/VOLDRV services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False:
                sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for vpool in VPoolList.get_vpools():
                    for storagedriver in vpool.storagedrivers:
                        root_client = sr_client_map[storagedriver.storagerouter_guid]
                        for entry in ['dtl', 'volumedriver']:
                            service_name = '{0}_{1}'.format(entry, vpool.name)
                            service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD
                            config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name)
                            if Configuration.exists(key=config_key):
                                config = Configuration.get(key=config_key)
                                config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                                config['VOLDRV_PKG_NAME'] = sd_pkg_name
                                config['VOLDRV_VERSION_CMD'] = sd_version_cmd
                                Configuration.set(key=config_key, value=config)
                                service_manager.regenerate_service(name=service_template,
                                                                   client=root_client,
                                                                   target_name='ovs-{0}'.format(service_name))
                                changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        #######################################################
        # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876)
        if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False:
            try:
                voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for storagerouter in StorageRouterList.get_storagerouters():
                    root_client = sr_client_map.get(storagerouter.guid)
                    if root_client is None:
                        continue

                    for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        regenerate = False
                        if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER:
                            if 'volumedriver-server' in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER)
                                root_client.file_write(filename=file_path, contents=contents)
                        elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE:
                            if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE)
                                contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE)
                                root_client.file_write(filename=file_path, contents=contents)

                        if regenerate is True:
                            service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD,
                                                               client=root_client,
                                                               target_name='ovs-{0}'.format(file_name.split('.')[0]))  # Leave out .version
                            changed_clients.add(root_client)
                Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True)
            except Exception:
                MigrationController._logger.exception('Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed')

        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])
        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagerouter in StorageRouterList.get_storagerouters():
                root_client = sr_client_map[storagerouter.guid]
                for service_name in service_manager.list_services(client=root_client):
                    if not service_name.startswith('ovs-arakoon-'):
                        continue
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        MigrationController._logger.info('Finished out of band migrations')
    def validate_alba_backend_sanity_without_claimed_disks(alba_backend):
        """
        Validate whether the ALBA backend is configured correctly
        :param alba_backend: ALBA backend
        :return: None
        """
        # Attribute validation
        assert alba_backend.available is True,\
            'ALBA backend {0} is not available'.format(alba_backend.backend.name)
        assert len(alba_backend.presets) >= 1,\
            'No preset found for ALBA backend {0}'.format(alba_backend.backend.name)
        assert len([default for default in alba_backend.presets if default['is_default'] is True]) == 1,\
            'Could not find default preset for backend {0}'.format(alba_backend.backend.name)
        assert alba_backend.backend.backend_type.code == 'alba',\
            'Backend type for ALBA backend is {0}'.format(alba_backend.backend.backend_type.code)
        assert alba_backend.backend.status == 'RUNNING',\
            'Status for ALBA backend is {0}'.format(alba_backend.backend.status)

        # Validate ABM and NSM services
        storagerouters = GeneralStorageRouter.get_storage_routers()
        storagerouters_with_db_role = [sr for sr in storagerouters if GeneralStorageRouter.has_roles(storagerouter=sr, roles='DB') is True and sr.node_type == 'MASTER']

        assert len(alba_backend.abm_services) == len(storagerouters_with_db_role),\
            'Not enough ABM services found'
        assert len(alba_backend.nsm_services) == len(storagerouters_with_db_role),\
            'Not enough NSM services found'

        # Validate ALBA backend configuration structure
        alba_backend_key = '/ovs/alba/backends'
        assert Configuration.dir_exists(key=alba_backend_key) is True,\
            'Configuration does not contain key {0}'.format(alba_backend_key)

        actual_config_keys = [key for key in Configuration.list(alba_backend_key)]
        expected_config_keys = ['global_gui_error_interval', alba_backend.guid, 'default_nsm_hosts']
        optional_config_keys = ['verification_factor']

        expected_keys_amount = 0
        for optional_key in optional_config_keys:
            if optional_key in actual_config_keys:
                expected_keys_amount += 1

        for expected_key in expected_config_keys:
            if not re.match(Toolbox.regex_guid, expected_key):
                expected_keys_amount += 1
            assert expected_key in actual_config_keys,\
                'Key {0} was not found in tree {1}'.format(expected_key, alba_backend_key)

        for actual_key in list(actual_config_keys):
            if re.match(Toolbox.regex_guid, actual_key):
                actual_config_keys.remove(actual_key)  # Remove all alba backend keys
        assert len(actual_config_keys) == expected_keys_amount,\
            'Another key was added to the {0} tree'.format(alba_backend_key)

        this_alba_backend_key = '{0}/{1}'.format(alba_backend_key, alba_backend.guid)
        actual_keys = [key for key in Configuration.list(this_alba_backend_key)]
        expected_keys = ['maintenance']
        assert actual_keys == expected_keys,\
            'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)

        maintenance_key = '{0}/maintenance'.format(this_alba_backend_key)
        actual_keys = [key for key in Configuration.list(maintenance_key)]
        expected_keys = ['nr_of_agents', 'config']
        assert set(actual_keys) == set(expected_keys),\
            'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)
        # @TODO: Add validation for config values

        # Validate ASD node configuration structure
        alba_nodes = GeneralAlba.get_alba_nodes()
        assert len(alba_nodes) > 0,\
            'Could not find any ALBA nodes in the model'
        alba_node_key = '/ovs/alba/asdnodes'
        actual_keys = [key for key in Configuration.list(alba_node_key)]
        assert len(alba_nodes) == len(actual_keys),\
            'Amount of ALBA nodes in model: {0} >< amount of ALBA nodes in configuration: {1}.'.format(len(alba_nodes),
                                                                                                       len(actual_keys))
        for alba_node in alba_nodes:
            assert alba_node.node_id in actual_keys,\
                'ALBA node with ID {0} not present in configuration'.format(alba_node.node_id)

            actual_asdnode_keys = [key for key in Configuration.list('{0}/{1}'.format(alba_node_key, alba_node.node_id))]
            expected_asdnode_keys = ['config', 'services']
            assert actual_asdnode_keys == expected_asdnode_keys,\
                'Actual keys: {0} - Expected keys: {1}'.format(actual_asdnode_keys, expected_asdnode_keys)

            actual_config_keys = [key for key in Configuration.list('{0}/{1}/config'.format(alba_node_key, alba_node.node_id))]
            expected_config_keys = ['main', 'network']
            assert set(actual_config_keys) == set(expected_config_keys),\
                'Actual keys: {0} - Expected keys: {1}'.format(actual_config_keys, expected_config_keys)
            # @TODO: Add validation for main and network values

        # Validate Arakoon configuration structure
        arakoon_abm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.abm_services[0].service.name).replace('arakoon-', '')
        arakoon_nsm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.nsm_services[0].service.name).replace('arakoon-', '')
        assert Configuration.exists(key=arakoon_abm_key, raw=True) is True,\
            'Configuration key {0} does not exist'.format(arakoon_abm_key)
        assert Configuration.exists(key=arakoon_nsm_key, raw=True) is True,\
            'Configuration key {0} does not exist'.format(arakoon_nsm_key)
        # @TODO: Add validation for config values

        # Validate maintenance agents
        actual_amount_agents = len([service for node_services in [alba_node.client.list_maintenance_services() for alba_node in alba_nodes] for service in node_services])
        expected_amount_agents = 1
        assert actual_amount_agents == expected_amount_agents,\
            'Amount of maintenance agents is incorrect. Found {0} - Expected {1}'.format(actual_amount_agents,
                                                                                         expected_amount_agents)

        # Validate arakoon services
        machine_ids = [sr.machine_id for sr in storagerouters_with_db_role]
        abm_service_name = alba_backend.abm_services[0].service.name
        nsm_service_name = alba_backend.nsm_services[0].service.name
        for storagerouter in storagerouters_with_db_role:
            root_client = SSHClient(endpoint=storagerouter, username='******')
            for service_name in [abm_service_name, nsm_service_name]:
                assert GeneralService.has_service(name=service_name, client=root_client) is True,\
                    'Service {0} not deployed on Storage Router {1}'.format(service_name, storagerouter.name)
                exitcode, output = GeneralService.get_service_status(name=service_name, client=root_client)
                assert exitcode is True,\
                    'Service {0} not running on Storage Router {1} - {2}'.format(service_name, storagerouter.name,
                                                                                 output)
                out, err, _ = General.execute_command('arakoon --who-master -config {0}'.format(Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(abm_service_name.replace('arakoon-', '')))))
                assert out.strip() in machine_ids,\
                    'Arakoon master is {0}, but should be 1 of "{1}"'.format(out.strip(), ', '.join(machine_ids))
Beispiel #43
0
    def shrink_vpool(cls,
                     storagedriver_guid,
                     offline_storage_router_guids=list()):
        """
        Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well)
        :param storagedriver_guid: Guid of the StorageDriver to remove
        :type storagedriver_guid: str
        :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster.
                                             WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS
        :type offline_storage_router_guids: list
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        # TODO: Unit test individual pieces of code
        # Validations
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        cls._logger.info(
            'StorageDriver {0} - Deleting StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        vp_installer = VPoolInstaller(name=storagedriver.vpool.name)
        vp_installer.validate(storagedriver=storagedriver)

        sd_installer = StorageDriverInstaller(vp_installer=vp_installer,
                                              storagedriver=storagedriver)

        cls._logger.info(
            'StorageDriver {0} - Checking availability of related StorageRouters'
            .format(storagedriver.guid, storagedriver.name))
        sr_client_map = SSHClient.get_clients(endpoints=[
            sd.storagerouter for sd in vp_installer.vpool.storagedrivers
        ],
                                              user_names=['root'])
        sr_installer = StorageRouterInstaller(root_client=sr_client_map.get(
            storagerouter, {}).get('root'),
                                              storagerouter=storagerouter,
                                              vp_installer=vp_installer,
                                              sd_installer=sd_installer)

        offline_srs = sr_client_map.pop('offline')
        if sorted([sr.guid for sr in offline_srs
                   ]) != sorted(offline_storage_router_guids):
            raise RuntimeError('Not all StorageRouters are reachable')

        if storagerouter not in offline_srs:
            mtpt_pids = sr_installer.root_client.run(
                "lsof -t +D '/mnt/{0}' || true".format(
                    vp_installer.name.replace(r"'", r"'\''")),
                allow_insecure=True).splitlines()
            if len(mtpt_pids) > 0:
                raise RuntimeError(
                    'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}'
                    .format(', '.join(mtpt_pids)))

        # Retrieve reachable StorageDrivers
        reachable_storagedrivers = []
        for sd in vp_installer.vpool.storagedrivers:
            if sd.storagerouter not in sr_client_map:
                # StorageRouter is offline
                continue

            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                vp_installer.vpool.guid, sd.storagedriver_id)
            if Configuration.exists(sd_key) is True:
                path = Configuration.get_configuration_path(sd_key)
                with remote(sd.storagerouter.ip,
                            [LocalStorageRouterClient]) as rem:
                    try:
                        lsrc = rem.LocalStorageRouterClient(path)
                        lsrc.server_revision(
                        )  # 'Cheap' call to verify whether volumedriver is responsive
                        cls._logger.info(
                            'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}'
                            .format(storagedriver.guid, sd.name,
                                    sd.storagerouter.ip))
                        reachable_storagedrivers.append(sd)
                    except Exception as exception:
                        if not is_connection_failure(exception):
                            raise

        if len(reachable_storagedrivers) == 0:
            raise RuntimeError(
                'Could not find any responsive node in the cluster')

        # Start removal
        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.SHRINKING)
        else:
            vp_installer.update_status(status=VPool.STATUSES.DELETING)

        # Clean up stale vDisks
        cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format(
            storagedriver.guid))
        VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool)

        # Reconfigure the MDSes
        cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format(
            storagedriver.guid))
        for vdisk_guid in storagerouter.vdisks_guids:
            try:
                MDSServiceController.ensure_safety(
                    vdisk_guid=vdisk_guid,
                    excluded_storagerouter_guids=[storagerouter.guid] +
                    offline_storage_router_guids)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed'
                    .format(storagedriver.guid, vdisk_guid))

        # Validate that all MDSes on current StorageRouter have been moved away
        # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code
        vdisks = []
        for mds in vp_installer.mds_services:
            for junction in mds.vdisks:
                vdisk = junction.vdisk
                if vdisk in vdisks:
                    continue
                vdisks.append(vdisk)
                cls._logger.critical(
                    'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away'
                    .format(storagedriver.guid, vdisk.guid, vdisk.name))
        if len(vdisks) > 0:
            # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
            raise RuntimeError(
                'Not all MDS Services have been successfully migrated away')

        # Start with actual removal
        errors_found = False
        if storagerouter not in offline_srs:
            errors_found &= sd_installer.stop_services()

        errors_found &= vp_installer.configure_cluster_registry(
            exclude=[storagedriver], apply_on=reachable_storagedrivers)
        errors_found &= vp_installer.update_node_distance_map()
        errors_found &= vp_installer.remove_mds_services()
        errors_found &= sd_installer.clean_config_management()
        errors_found &= sd_installer.clean_model()

        if storagerouter not in offline_srs:
            errors_found &= sd_installer.clean_directories(
                mountpoints=StorageRouterController.get_mountpoints(
                    client=sr_installer.root_client))

            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=storagerouter.guid)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - Synchronizing disks with reality failed'
                    .format(storagedriver.guid))
                errors_found = True

        if vp_installer.storagedriver_amount > 1:
            # Update the vPool metadata and run DTL checkup
            vp_installer.vpool.metadata['caching_info'].pop(
                sr_installer.storagerouter.guid, None)
            vp_installer.vpool.save()

            try:
                VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                            ensure_single_timeout=600)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}'
                    .format(storagedriver.guid, vp_installer.name,
                            vp_installer.vpool.guid))
        else:
            cls._logger.info(
                'StorageDriver {0} - Removing vPool from model'.format(
                    storagedriver.guid))
            # Clean up model
            try:
                vp_installer.vpool.delete()
            except Exception:
                errors_found = True
                cls._logger.exception(
                    'StorageDriver {0} - Cleaning up vPool from the model failed'
                    .format(storagedriver.guid))
            Configuration.delete('/ovs/vpools/{0}'.format(
                vp_installer.vpool.guid))

        cls._logger.info('StorageDriver {0} - Running MDS checkup'.format(
            storagedriver.guid))
        try:
            MDSServiceController.mds_checkup()
        except Exception:
            cls._logger.exception(
                'StorageDriver {0} - MDS checkup failed'.format(
                    storagedriver.guid))

        # Update vPool status
        if errors_found is True:
            if vp_installer.storagedriver_amount > 1:
                vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise RuntimeError(
                '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information'
            )

        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info(
            'StorageDriver {0} - Deleted StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        if len(VPoolList.get_vpools()) == 0:
            cluster_name = ArakoonInstaller.get_cluster_name('voldrv')
            if ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                    cluster_name=cluster_name)['internal'] is True:
                cls._logger.debug(
                    'StorageDriver {0} - Removing Arakoon cluster {1}'.format(
                        storagedriver.guid, cluster_name))
                try:
                    installer = ArakoonInstaller(cluster_name=cluster_name)
                    installer.load()
                    installer.delete_cluster()
                except Exception:
                    cls._logger.exception(
                        'StorageDriver {0} - Delete voldrv Arakoon cluster failed'
                        .format(storagedriver.guid))
                service_type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                for service in list(service_type.services):
                    if service.name == service_name:
                        service.delete()

        # Remove watcher volumedriver service if last StorageDriver on current StorageRouter
        if len(
                storagerouter.storagedrivers
        ) == 0 and storagerouter not in offline_srs:  # ensure client is initialized for StorageRouter
            try:
                if cls._service_manager.has_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client):
                    cls._service_manager.stop_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
                    cls._service_manager.remove_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - {1} service deletion failed'.format(
                        storagedriver.guid,
                        ServiceFactory.SERVICE_WATCHER_VOLDRV))
Beispiel #44
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.generic import GenericController

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter,
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name)
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_service_name=old_service_name,
                                                    new_service_name=new_service_name)

                # Register new service and remove old service
                service_manager.add_service(name='ovs-albaproxy',
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get('fragment_cache', ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']:
                            proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info
                            Configuration.set(proxy_scrub_config_key,
                                              json.dumps(proxy_scrub_config, indent=4),
                                              raw=True)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id)
            storagedriver_config.load()
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_service_name='volumedriver_{0}'.format(vpool.name))
                if service_manager.ImplementationClass == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        for vpool in VPoolList.get_vpools():
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        MigrationController._logger.info('Finished out of band migrations')
        GenericController.refresh_package_information()
Beispiel #45
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(
            vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and ServiceManager.get_service_status(
                        name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    port = System.get_free_ports(selected_range=port_range,
                                                 nr=1,
                                                 client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path('alba_proxy'),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    ServiceManager.add_service(name='ovs-albaproxy',
                                               params=params,
                                               client=client,
                                               target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service,
                                                 client=client)
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service,
                                                     client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service,
                                                client=client)
                ServiceManager.remove_service(name=alba_proxy_service,
                                              client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_directory,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber', allow_override=True)
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service,
                                              client=client):
                    ServiceManager.stop_service(alba_proxy_service,
                                                client=client)
                    ServiceManager.remove_service(alba_proxy_service,
                                                  client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
Beispiel #46
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            try:
                from ovs.extensions.generic.configuration import Configuration
                if Configuration.exists('ovs.arakoon'):
                    Configuration.delete('ovs.arakoon', remove_root=True)
                Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')
            except:
                logger.exception('Error migrating to version 1')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            try:
                import time
                from ovs.extensions.generic.configuration import Configuration
                if not Configuration.exists('ovs.core.registered'):
                    Configuration.set('ovs.core.registered', False)
                    Configuration.set('ovs.core.install_time', time.time())
            except:
                logger.exception('Error migrating to version 2')

        working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            try:
                from ovs.extensions.db.arakoon import ArakoonInstaller
                reload(ArakoonInstaller)
                from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.configuration import Configuration
                if master_ips is not None:
                    for ip in master_ips:
                        client = SSHClient(ip)
                        if client.dir_exists(ArakoonInstaller.ARAKOON_CONFIG_DIR):
                            for cluster_name in client.dir_list(ArakoonInstaller.ARAKOON_CONFIG_DIR):
                                try:
                                    ArakoonInstaller.deploy_cluster(cluster_name, ip)
                                except:
                                    pass
                if Configuration.exists('ovs.core.storage.persistent'):
                    Configuration.set('ovs.core.storage.persistent', 'pyrakoon')
            except:
                logger.exception('Error migrating to version 3')

            working_version = 3

        # Version 4 introduced:
        # - Etcd
        if working_version < 4:
            try:
                import os
                import json
                from ConfigParser import RawConfigParser
                from ovs.extensions.db.etcd import installer
                reload(installer)
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                from ovs.extensions.db.etcd.configuration import EtcdConfiguration
                from ovs.extensions.generic.system import System
                host_id = System.get_my_machine_id()
                etcd_migrate = False
                if EtcdInstaller.has_cluster('127.0.0.1', 'config'):
                    etcd_migrate = True
                else:
                    if master_ips is not None and extra_ips is not None:
                        cluster_ip = None
                        for ip in master_ips + extra_ips:
                            if EtcdInstaller.has_cluster(ip, 'config'):
                                cluster_ip = ip
                                break
                        node_ip = None
                        path = '/opt/OpenvStorage/config/ovs.json'
                        if os.path.exists(path):
                            with open(path) as config_file:
                                config = json.load(config_file)
                                node_ip = config['grid']['ip']
                        if node_ip is not None:
                            if cluster_ip is None:
                                EtcdInstaller.create_cluster('config', node_ip)
                                EtcdConfiguration.initialize()
                                EtcdConfiguration.initialize_host(host_id)
                            else:
                                EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config')
                                EtcdConfiguration.initialize_host(host_id)
                            etcd_migrate = True
                if etcd_migrate is True:
                    # Migrating configuration files
                    path = '/opt/OpenvStorage/config/ovs.json'
                    if os.path.exists(path):
                        with open(path) as config_file:
                            config = json.load(config_file)
                            EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid'])
                            if not EtcdConfiguration.exists('/ovs/framework/install_time'):
                                EtcdConfiguration.set('/ovs/framework/install_time', config['core']['install_time'])
                            else:
                                EtcdConfiguration.set('/ovs/framework/install_time', min(EtcdConfiguration.get('/ovs/framework/install_time'), config['core']['install_time']))
                            EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered'])
                            EtcdConfiguration.set('/ovs/framework/plugins/installed', config['plugins'])
                            EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage'])
                            EtcdConfiguration.set('/ovs/framework/paths', {'cfgdir': config['core']['cfgdir'],
                                                                           'basedir': config['core']['basedir'],
                                                                           'ovsdb': config['core']['ovsdb']})
                            EtcdConfiguration.set('/ovs/framework/support', {'enablesupport': config['support']['enablesupport'],
                                                                             'enabled': config['support']['enabled'],
                                                                             'interval': config['support']['interval']})
                            EtcdConfiguration.set('/ovs/framework/storagedriver', {'mds_safety': config['storagedriver']['mds']['safety'],
                                                                                   'mds_tlogs': config['storagedriver']['mds']['tlogs'],
                                                                                   'mds_maxload': config['storagedriver']['mds']['maxload']})
                            EtcdConfiguration.set('/ovs/framework/webapps', {'html_endpoint': config['webapps']['html_endpoint'],
                                                                             'oauth2': config['webapps']['oauth2']})
                            EtcdConfiguration.set('/ovs/framework/messagequeue', {'endpoints': [],
                                                                                  'protocol': config['core']['broker']['protocol'],
                                                                                  'user': config['core']['broker']['login'],
                                                                                  'port': config['core']['broker']['port'],
                                                                                  'password': config['core']['broker']['password'],
                                                                                  'queues': config['core']['broker']['queues']})
                            host_key = '/ovs/framework/hosts/{0}{{0}}'.format(host_id)
                            EtcdConfiguration.set(host_key.format('/storagedriver'), {'rsp': config['storagedriver']['rsp'],
                                                                                      'vmware_mode': config['storagedriver']['vmware_mode']})
                            EtcdConfiguration.set(host_key.format('/ports'), config['ports'])
                            EtcdConfiguration.set(host_key.format('/setupcompleted'), config['core']['setupcompleted'])
                            EtcdConfiguration.set(host_key.format('/versions'), config['core'].get('versions', {}))
                            EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype'])
                            EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip'])
                    path = '{0}/memcacheclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [config.get(node.strip(), 'location').strip()
                                 for node in config.get('main', 'nodes').split(',')]
                        EtcdConfiguration.set('/ovs/framework/memcache|endpoints', nodes)
                        os.remove(path)
                    path = '{0}/rabbitmqclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [config.get(node.strip(), 'location').strip()
                                 for node in config.get('main', 'nodes').split(',')]
                        EtcdConfiguration.set('/ovs/framework/messagequeue|endpoints', nodes)
                        os.remove(path)
                    # Migrate arakoon configuration files
                    from ovs.extensions.db.arakoon import ArakoonInstaller
                    reload(ArakoonInstaller)
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.generic.sshclient import SSHClient
                    if master_ips is not None:
                        config_dir = '/opt/OpenvStorage/config/arakoon/'
                        for ip in master_ips:
                            client = SSHClient(ip)
                            if client.dir_exists(config_dir):
                                for cluster_name in client.dir_list(config_dir):
                                    try:
                                        with open('{0}/{1}/{1}.cfg'.format(config_dir, cluster_name)) as config_file:
                                            EtcdConfiguration.set(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster_name),
                                                                  config_file.read(),
                                                                  raw=True)
                                            ArakoonInstaller.deploy_cluster(cluster_name, ip)
                                    except:
                                        logger.exception('Error migrating {0} on {1}'.format(cluster_name, ip))
                                client.dir_delete(config_dir)
            except:
                logger.exception('Error migrating to version 4')

            working_version = 4

        return working_version
Beispiel #47
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        logger = LogHandler.get('extensions', name='migration')
        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            try:
                from ovs.extensions.generic.configuration import Configuration
                if Configuration.exists('ovs.arakoon'):
                    Configuration.delete('ovs.arakoon', remove_root=True)
                Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')
            except:
                logger.exception('Error migrating to version 1')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            try:
                import time
                from ovs.extensions.generic.configuration import Configuration
                if not Configuration.exists('ovs.core.registered'):
                    Configuration.set('ovs.core.registered', False)
                    Configuration.set('ovs.core.install_time', time.time())
            except:
                logger.exception('Error migrating to version 2')

        working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            try:
                from ovs.extensions.db.arakoon import ArakoonInstaller
                reload(ArakoonInstaller)
                from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.configuration import Configuration
                if master_ips is not None:
                    for ip in master_ips:
                        client = SSHClient(ip)
                        if client.dir_exists(
                                ArakoonInstaller.ARAKOON_CONFIG_DIR):
                            for cluster_name in client.dir_list(
                                    ArakoonInstaller.ARAKOON_CONFIG_DIR):
                                try:
                                    ArakoonInstaller.deploy_cluster(
                                        cluster_name, ip)
                                except:
                                    pass
                if Configuration.exists('ovs.core.storage.persistent'):
                    Configuration.set('ovs.core.storage.persistent',
                                      'pyrakoon')
            except:
                logger.exception('Error migrating to version 3')

            working_version = 3

        # Version 4 introduced:
        # - Etcd
        if working_version < 4:
            try:
                import os
                import json
                from ConfigParser import RawConfigParser
                from ovs.extensions.db.etcd import installer
                reload(installer)
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                from ovs.extensions.db.etcd.configuration import EtcdConfiguration
                from ovs.extensions.generic.system import System
                host_id = System.get_my_machine_id()
                etcd_migrate = False
                if EtcdInstaller.has_cluster('127.0.0.1', 'config'):
                    etcd_migrate = True
                else:
                    if master_ips is not None and extra_ips is not None:
                        cluster_ip = None
                        for ip in master_ips + extra_ips:
                            if EtcdInstaller.has_cluster(ip, 'config'):
                                cluster_ip = ip
                                break
                        node_ip = None
                        path = '/opt/OpenvStorage/config/ovs.json'
                        if os.path.exists(path):
                            with open(path) as config_file:
                                config = json.load(config_file)
                                node_ip = config['grid']['ip']
                        if node_ip is not None:
                            if cluster_ip is None:
                                EtcdInstaller.create_cluster('config', node_ip)
                                EtcdConfiguration.initialize()
                                EtcdConfiguration.initialize_host(host_id)
                            else:
                                EtcdInstaller.extend_cluster(
                                    cluster_ip, node_ip, 'config')
                                EtcdConfiguration.initialize_host(host_id)
                            etcd_migrate = True
                if etcd_migrate is True:
                    # Migrating configuration files
                    path = '/opt/OpenvStorage/config/ovs.json'
                    if os.path.exists(path):
                        with open(path) as config_file:
                            config = json.load(config_file)
                            EtcdConfiguration.set('/ovs/framework/cluster_id',
                                                  config['support']['cid'])
                            if not EtcdConfiguration.exists(
                                    '/ovs/framework/install_time'):
                                EtcdConfiguration.set(
                                    '/ovs/framework/install_time',
                                    config['core']['install_time'])
                            else:
                                EtcdConfiguration.set(
                                    '/ovs/framework/install_time',
                                    min(
                                        EtcdConfiguration.get(
                                            '/ovs/framework/install_time'),
                                        config['core']['install_time']))
                            EtcdConfiguration.set('/ovs/framework/registered',
                                                  config['core']['registered'])
                            EtcdConfiguration.set(
                                '/ovs/framework/plugins/installed',
                                config['plugins'])
                            EtcdConfiguration.set('/ovs/framework/stores',
                                                  config['core']['storage'])
                            EtcdConfiguration.set(
                                '/ovs/framework/paths', {
                                    'cfgdir': config['core']['cfgdir'],
                                    'basedir': config['core']['basedir'],
                                    'ovsdb': config['core']['ovsdb']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/support', {
                                    'enablesupport':
                                    config['support']['enablesupport'],
                                    'enabled':
                                    config['support']['enabled'],
                                    'interval':
                                    config['support']['interval']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/storagedriver', {
                                    'mds_safety':
                                    config['storagedriver']['mds']['safety'],
                                    'mds_tlogs':
                                    config['storagedriver']['mds']['tlogs'],
                                    'mds_maxload':
                                    config['storagedriver']['mds']['maxload']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/webapps', {
                                    'html_endpoint':
                                    config['webapps']['html_endpoint'],
                                    'oauth2':
                                    config['webapps']['oauth2']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/messagequeue', {
                                    'endpoints': [],
                                    'protocol':
                                    config['core']['broker']['protocol'],
                                    'user':
                                    config['core']['broker']['login'],
                                    'port':
                                    config['core']['broker']['port'],
                                    'password':
                                    config['core']['broker']['password'],
                                    'queues':
                                    config['core']['broker']['queues']
                                })
                            host_key = '/ovs/framework/hosts/{0}{{0}}'.format(
                                host_id)
                            EtcdConfiguration.set(
                                host_key.format('/storagedriver'), {
                                    'rsp':
                                    config['storagedriver']['rsp'],
                                    'vmware_mode':
                                    config['storagedriver']['vmware_mode']
                                })
                            EtcdConfiguration.set(host_key.format('/ports'),
                                                  config['ports'])
                            EtcdConfiguration.set(
                                host_key.format('/setupcompleted'),
                                config['core']['setupcompleted'])
                            EtcdConfiguration.set(
                                host_key.format('/versions'),
                                config['core'].get('versions', {}))
                            EtcdConfiguration.set(host_key.format('/type'),
                                                  config['core']['nodetype'])
                            EtcdConfiguration.set(host_key.format('/ip'),
                                                  config['grid']['ip'])
                    path = '{0}/memcacheclient.cfg'.format(
                        EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [
                            config.get(node.strip(), 'location').strip()
                            for node in config.get('main', 'nodes').split(',')
                        ]
                        EtcdConfiguration.set(
                            '/ovs/framework/memcache|endpoints', nodes)
                        os.remove(path)
                    path = '{0}/rabbitmqclient.cfg'.format(
                        EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [
                            config.get(node.strip(), 'location').strip()
                            for node in config.get('main', 'nodes').split(',')
                        ]
                        EtcdConfiguration.set(
                            '/ovs/framework/messagequeue|endpoints', nodes)
                        os.remove(path)
                    # Migrate arakoon configuration files
                    from ovs.extensions.db.arakoon import ArakoonInstaller
                    reload(ArakoonInstaller)
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.generic.sshclient import SSHClient
                    if master_ips is not None:
                        config_dir = '/opt/OpenvStorage/config/arakoon/'
                        for ip in master_ips:
                            client = SSHClient(ip)
                            if client.dir_exists(config_dir):
                                for cluster_name in client.dir_list(
                                        config_dir):
                                    try:
                                        with open('{0}/{1}/{1}.cfg'.format(
                                                config_dir,
                                                cluster_name)) as config_file:
                                            EtcdConfiguration.set(
                                                ArakoonClusterConfig.
                                                ETCD_CONFIG_KEY.format(
                                                    cluster_name),
                                                config_file.read(),
                                                raw=True)
                                            ArakoonInstaller.deploy_cluster(
                                                cluster_name, ip)
                                    except:
                                        logger.exception(
                                            'Error migrating {0} on {1}'.
                                            format(cluster_name, ip))
                                client.dir_delete(config_dir)
            except:
                logger.exception('Error migrating to version 4')

            working_version = 4

        return working_version
Beispiel #48
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """

        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(scrub_directory, 0777)  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
                    port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True)

                    params = {'VPOOL_NAME': vpool.name,
                              'LOG_SINK': LogHandler.get_sink_path('alba_proxy'),
                              'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)}
                    ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service, client=client)
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))

                backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service, client=client)
                ServiceManager.remove_service(name=alba_proxy_service, client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                        if configs[0].get('ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name))
                            MDSServiceController.ensure_safety(VDisk(vdisk_guid))  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get('ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits()
                            for work_unit in work_units:
                                res = locked_client.scrub(work_unit=work_unit,
                                                          scratch_dir=scrub_directory,
                                                          log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)],
                                                          backend_config=Configuration.get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service, client=client):
                    ServiceManager.stop_service(alba_proxy_service, client=client)
                    ServiceManager.remove_service(alba_proxy_service, client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
Beispiel #49
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            from ovs.extensions.generic.configuration import Configuration
            if Configuration.exists('ovs.arakoon'):
                Configuration.delete('ovs.arakoon', remove_root=True)
            Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            import time
            from ovs.extensions.generic.configuration import Configuration
            if not Configuration.exists('ovs.core.registered'):
                Configuration.set('ovs.core.registered', False)
                Configuration.set('ovs.core.install_time', time.time())

            working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            from ovs.extensions.db.arakoon import ArakoonInstaller
            reload(ArakoonInstaller)
            from ovs.extensions.db.arakoon import ArakoonInstaller
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.generic.configuration import Configuration
            if master_ips is not None:
                for ip in master_ips:
                    client = SSHClient(ip)
                    if client.dir_exists(ArakoonInstaller.ArakoonInstaller.
                                         ARAKOON_CONFIG_DIR):
                        for cluster_name in client.dir_list(
                                ArakoonInstaller.ArakoonInstaller.
                                ARAKOON_CONFIG_DIR):
                            try:
                                ArakoonInstaller.ArakoonInstaller.deploy_cluster(
                                    cluster_name, ip)
                            except:
                                pass
            if Configuration.exists('ovs.core.storage.persistent'):
                Configuration.set('ovs.core.storage.persistent', 'pyrakoon')

            working_version = 3

        return working_version
Beispiel #50
0
    def execute_update(components):
        """
        Update the specified components on all StorageRouters
        This is called upon by 'at'
        :return: None
        """
        filemutex = file_mutex('system_update', wait=2)
        ssh_clients = []
        services_stop_start = set()
        try:
            filemutex.acquire()
            UpdateController._logger.debug('+++ Starting update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            # Create SSHClients to all nodes
            UpdateController._logger.debug('Generating SSH client connections for each storage router')
            storage_routers = StorageRouterList.get_storagerouters()
            master_ips = []
            extra_ips = []
            for sr in storage_routers:
                try:
                    ssh_clients.append(SSHClient(sr.ip, username='******'))
                    if sr.node_type == 'MASTER':
                        master_ips.append(sr.ip)
                    elif sr.node_type == 'EXTRA':
                        extra_ips.append(sr.ip)
                except UnableToConnectException:
                    raise Exception('Update is only allowed on systems where all nodes are online and fully functional')

            # Create locks
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Creating lock files'.format(client.ip))
                client.run(['touch', UpdateController._update_file])  # Prevents manual install or update individual packages
                client.run(['touch', UpdateController._update_ongoing_file])

            # Check requirements
            packages_to_update = {}
            services_post_update = set()
            update_information = UpdateController.get_update_information_all()
            for component, component_info in update_information.iteritems():
                if component in components:
                    UpdateController._logger.debug('Verifying update information for component: {0}'.format(component.upper()))
                    Toolbox.verify_required_params(actual_params=component_info,
                                                   required_params={'downtime': (list, None),
                                                                    'packages': (dict, None),
                                                                    'prerequisites': (list, None),
                                                                    'services_stop_start': (set, None),
                                                                    'services_post_update': (set, None)})
                    if len(component_info['prerequisites']) > 0:
                        raise Exception('Update is only allowed when all prerequisites have been met')

                    packages_to_update.update(component_info['packages'])
                    services_stop_start.update(component_info['services_stop_start'])
                    services_post_update.update(component_info['services_post_update'])
            if len(packages_to_update) > 0:
                UpdateController._logger.debug('Packages to be updated: {0}'.format(', '.join(sorted(packages_to_update.keys()))))
            if len(services_stop_start) > 0:
                UpdateController._logger.debug('Services to stop before package update: {0}'.format(', '.join(sorted(services_stop_start))))
            if len(services_post_update) > 0:
                UpdateController._logger.debug('Services which will be restarted after update: {0}'.format(', '.join(sorted(services_post_update))))

            # Stop services
            if UpdateController.change_services_state(services=services_stop_start,
                                                      ssh_clients=ssh_clients,
                                                      action='stop') is False:
                raise Exception('Stopping all services on every node failed, cannot continue')

            # Install packages
            # First install packages on all StorageRouters individually
            if packages_to_update:
                failures = False
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Installing packages'.format(client.ip))
                    for function in Toolbox.fetch_hooks('update', 'package_install_multi'):
                        try:
                            function(client=client, package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.error('{0}: Package installation hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))
                            failures = True

                if set(components).difference({'framework', 'storagedriver'}):
                    # Second install packages on all ALBA nodes
                    for function in Toolbox.fetch_hooks('update', 'package_install_single'):
                        try:
                            function(package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.exception('Package installation hook {0} failed with error: {1}'.format(function.__name__, ex))
                            failures = True

                if failures is True:
                    raise Exception('Installing the packages failed on 1 or more nodes')

            # Remove update file
            for client in ssh_clients:
                client.file_delete(UpdateController._update_file)

            # Migrate code
            if 'framework' in components:
                failures = []
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Verifying extensions code migration is required'.format(client.ip))
                    try:
                        key = '/ovs/framework/hosts/{0}/versions'.format(System.get_my_machine_id(client=client))
                        old_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        try:
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        except EOFError as eof:
                            UpdateController._logger.warning('{0}: EOFError during code migration, retrying {1}'.format(client.ip, eof))
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        new_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        if old_versions != new_versions:
                            UpdateController._logger.debug('{0}: Finished extensions code migration. Old versions: {1} --> New versions: {2}'.format(client.ip, old_versions, new_versions))
                    except Exception as ex:
                        failures.append('{0}: {1}'.format(client.ip, str(ex)))
                if len(failures) > 0:
                    raise Exception('Failed to run the extensions migrate code on all nodes. Errors found:\n\n{0}'.format('\n\n'.join(failures)))

            # Start memcached
            if 'memcached' in services_stop_start:
                services_stop_start.remove('memcached')
                UpdateController._logger.debug('Starting memcached')
                UpdateController.change_services_state(services=['memcached'],
                                                       ssh_clients=ssh_clients,
                                                       action='start')

            # Migrate model
            if 'framework' in components:
                UpdateController._logger.debug('Verifying DAL code migration is required')
                old_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}

                from ovs.dal.helpers import Migration
                with remote(ssh_clients[0].ip, [Migration]) as rem:
                    rem.Migration.migrate()

                new_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}
                if old_versions != new_versions:
                    UpdateController._logger.debug('Finished DAL code migration. Old versions: {0} --> New versions: {1}'.format(old_versions, new_versions))

            # Post update actions
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Executing post-update actions'.format(client.ip))
                for function in Toolbox.fetch_hooks('update', 'post_update_multi'):
                    try:
                        function(client=client, components=components)
                    except Exception as ex:
                        UpdateController._logger.exception('{0}: Post update hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))

            for function in Toolbox.fetch_hooks('update', 'post_update_single'):
                try:
                    function(components=components)
                except Exception as ex:
                    UpdateController._logger.exception('Post update hook {0} failed with error: {1}'.format(function.__name__, ex))

            # Start services
            UpdateController.change_services_state(services=services_stop_start,
                                                   ssh_clients=ssh_clients,
                                                   action='start')

            UpdateController._refresh_package_information()
            UpdateController._logger.debug('+++ Finished updating +++')
        except NoLockAvailableException:
            UpdateController._logger.debug('Another update is currently in progress!')
        except Exception as ex:
            UpdateController._logger.exception('Error during update: {0}'.format(ex))
            if len(ssh_clients) > 0:
                UpdateController.change_services_state(services=services_stop_start,
                                                       ssh_clients=ssh_clients,
                                                       action='start')
                UpdateController._refresh_package_information()
                UpdateController._logger.error('Failed to update. Please check all the logs for more information')
        finally:
            filemutex.release()
            for ssh_client in ssh_clients:
                for file_name in [UpdateController._update_file, UpdateController._update_ongoing_file]:
                    try:
                        if ssh_client.file_exists(file_name):
                            ssh_client.file_delete(file_name)
                    except:
                        UpdateController._logger.warning('[0}: Failed to remove lock file {1}'.format(ssh_client.ip, file_name))
Beispiel #51
0
    def cluster_registry_checkup():
        """
        Verify whether changes have occurred in the cluster registry for each vPool
        :return: Information whether changes occurred
        :rtype: dict
        """
        changed_vpools = {}
        for vpool in VPoolList.get_vpools():
            changed_vpools[vpool.guid] = {'changes': False, 'success': True}
            try:
                StorageDriverController._logger.info(
                    'Validating cluster registry settings for Vpool {0}'.
                    format(vpool.guid))

                current_configs = vpool.clusterregistry_client.get_node_configs(
                )
                changes = len(current_configs) == 0
                node_configs = []
                for sd in vpool.storagedrivers:
                    sd.invalidate_dynamics(['cluster_node_config'])
                    new_config = sd.cluster_node_config
                    node_configs.append(ClusterNodeConfig(**new_config))
                    if changes is False:
                        current_node_configs = [
                            config for config in current_configs
                            if config.vrouter_id == sd.storagedriver_id
                        ]
                        if len(current_node_configs) == 1:
                            current_node_config = current_node_configs[0]
                            for key in new_config:
                                if getattr(current_node_config,
                                           key) != new_config[key]:
                                    changes = True
                                    break
                changed_vpools[vpool.guid]['changes'] = changes

                if changes is True:
                    StorageDriverController._logger.info(
                        'Cluster registry settings for Vpool {0} needs to be updated'
                        .format(vpool.guid))
                    available_storagedrivers = []
                    for sd in vpool.storagedrivers:
                        storagerouter = sd.storagerouter
                        try:
                            SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            StorageDriverController._logger.warning(
                                'StorageRouter {0} not available.'.format(
                                    storagerouter.name))
                            continue

                        with remote(storagerouter.ip,
                                    [LocalStorageRouterClient]) as rem:
                            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                                vpool.guid, sd.storagedriver_id)
                            if Configuration.exists(sd_key) is True:
                                path = Configuration.get_configuration_path(
                                    sd_key)
                                try:
                                    lsrc = rem.LocalStorageRouterClient(path)
                                    lsrc.server_revision(
                                    )  # 'Cheap' call to verify whether volumedriver is responsive
                                    available_storagedrivers.append(sd)
                                except Exception as ex:
                                    if 'ClusterNotReachableException' in str(
                                            ex):
                                        StorageDriverController._logger.warning(
                                            'StorageDriver {0} on StorageRouter {1} not available.'
                                            .format(sd.guid,
                                                    storagerouter.name))
                                    else:
                                        StorageDriverController._logger.exception(
                                            'Got exception when validating StorageDriver {0} on StorageRouter {1}.'
                                            .format(sd.guid,
                                                    storagerouter.name))

                    StorageDriverController._logger.info(
                        'Updating cluster node configs for VPool {0}'.format(
                            vpool.guid))
                    vpool.clusterregistry_client.set_node_configs(node_configs)
                    for sd in available_storagedrivers:
                        StorageDriverController._logger.info(
                            'Trigger config reload for StorageDriver {0}'.
                            format(sd.guid))
                        vpool.storagedriver_client.update_cluster_node_configs(
                            str(sd.storagedriver_id), req_timeout_secs=10)
                    StorageDriverController._logger.info(
                        'Updating cluster node configs for Vpool {0} completed'
                        .format(vpool.guid))
                else:
                    StorageDriverController._logger.info(
                        'Cluster registry settings for Vpool {0} is up to date'
                        .format(vpool.guid))
            except Exception as ex:
                StorageDriverController._logger.exception(
                    'Got exception when validating cluster registry settings for Vpool {0}.'
                    .format(vpool.name))
                changed_vpools[vpool.guid]['success'] = False
                changed_vpools[vpool.guid]['error'] = ex.message
        return changed_vpools
Beispiel #52
0
    def stop_services(self):
        """
        Stop all services related to the Storagedriver
        :return: A boolean indicating whether something went wrong
        :rtype: bool
        """
        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')

        root_client = self.sr_installer.root_client
        errors_found = False

        for service in [self.sd_service, self.dtl_service]:
            try:
                if self.service_manager.has_service(name=service, client=root_client):
                    self._logger.debug('StorageDriver {0} - Stopping service {1}'.format(self.storagedriver.guid, service))
                    self.service_manager.stop_service(name=service, client=root_client)
                    self._logger.debug('StorageDriver {0} - Removing service {1}'.format(self.storagedriver.guid, service))
                    self.service_manager.remove_service(name=service, client=root_client)
            except Exception:
                self._logger.exception('StorageDriver {0} - Disabling/stopping service {1} failed'.format(self.storagedriver.guid, service))
                errors_found = True

        sd_config_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(self.vp_installer.vpool.guid, self.storagedriver.storagedriver_id)
        if self.vp_installer.storagedriver_amount <= 1 and Configuration.exists(sd_config_key):
            try:
                for proxy in self.storagedriver.alba_proxies:
                    if self.service_manager.has_service(name=proxy.service.name, client=root_client):
                        self._logger.debug('StorageDriver {0} - Starting proxy {1}'.format(self.storagedriver.guid, proxy.service.name))
                        self.service_manager.start_service(name=proxy.service.name, client=root_client)
                        tries = 10
                        running = False
                        port = proxy.service.ports[0]
                        while running is False and tries > 0:
                            self._logger.debug('StorageDriver {0} - Waiting for the proxy {1} to start up'.format(self.storagedriver.guid, proxy.service.name))
                            tries -= 1
                            time.sleep(10 - tries)
                            try:
                                root_client.run(['alba', 'proxy-statistics', '--host', self.storagedriver.storage_ip, '--port', str(port)])
                                running = True
                            except CalledProcessError as ex:
                                self._logger.error('StorageDriver {0} - Fetching alba proxy-statistics failed with error (but ignoring): {1}'.format(self.storagedriver.guid, ex))
                        if running is False:
                            raise RuntimeError('Alba proxy {0} failed to start'.format(proxy.service.name))
                        self._logger.debug('StorageDriver {0} - Alba proxy {0} running'.format(self.storagedriver.guid, proxy.service.name))

                self._logger.debug('StorageDriver {0} - Destroying filesystem and erasing node configs'.format(self.storagedriver.guid))
                with remote(root_client.ip, [LocalStorageRouterClient], username='******') as rem:
                    path = Configuration.get_configuration_path(sd_config_key)
                    storagedriver_client = rem.LocalStorageRouterClient(path)
                    try:
                        storagedriver_client.destroy_filesystem()
                    except RuntimeError as rte:
                        # If backend has already been deleted, we cannot delete the filesystem anymore --> storage leak!!!
                        if 'MasterLookupResult.Error' not in rte.message:
                            raise

                self.vp_installer.vpool.clusterregistry_client.erase_node_configs()
            except RuntimeError:
                self._logger.exception('StorageDriver {0} - Destroying filesystem and erasing node configs failed'.format(self.storagedriver.guid))
                errors_found = True

        for proxy in self.storagedriver.alba_proxies:
            service_name = proxy.service.name
            try:
                if self.service_manager.has_service(name=service_name, client=root_client):
                    self._logger.debug('StorageDriver {0} - Stopping service {1}'.format(self.storagedriver.guid, service_name))
                    self.service_manager.stop_service(name=service_name, client=root_client)
                    self._logger.debug('StorageDriver {0} - Removing service {1}'.format(self.storagedriver.guid, service_name))
                    self.service_manager.remove_service(name=service_name, client=root_client)
            except Exception:
                self._logger.exception('StorageDriver {0} - Disabling/stopping service {1} failed'.format(self.storagedriver.guid, service_name))
                errors_found = True

        return errors_found