Example #1
0
 def apply(license_string):
     """
     Applies a license. It will apply as much licenses as possible, however, it won't fail on invalid licenses as it
     will simply skip them.
     """
     try:
         clients = {}
         storagerouters = StorageRouterList.get_storagerouters()
         try:
             for storagerouter in storagerouters:
                 clients[storagerouter] = SSHClient(storagerouter.ip)
         except UnableToConnectException:
             raise RuntimeError('Not all StorageRouters are reachable')
         data = LicenseController._decode(license_string)
         for component in data:
             cdata = data[component]
             name = cdata['name']
             data = cdata['data']
             token = cdata['token']
             valid_until = float(
                 cdata['valid_until']) if 'valid_until' in cdata else None
             if valid_until is not None and valid_until <= time.time():
                 continue
             signature = cdata['signature'] if 'signature' in cdata else None
             validate_functions = Toolbox.fetch_hooks(
                 'license', '{0}.validate'.format(component))
             apply_functions = Toolbox.fetch_hooks(
                 'license', '{0}.apply'.format(component))
             if len(validate_functions) == 1 and len(apply_functions) == 1:
                 valid, metadata = validate_functions[0](
                     component=component, data=data, signature=signature)
                 if valid is True:
                     success = apply_functions[0](component=component,
                                                  data=data,
                                                  signature=signature)
                     if success is True:
                         license_object = LicenseList.get_by_component(
                             component)
                         if license_object is None:
                             license_object = License()
                         license_object.component = component
                         license_object.name = name
                         license_object.token = token
                         license_object.data = data
                         license_object.valid_until = valid_until
                         license_object.signature = signature
                         license_object.save()
         license_contents = []
         for lic in LicenseList.get_licenses():
             license_contents.append(lic.hash)
         for storagerouter in storagerouters:
             client = clients[storagerouter]
             client.file_write('/opt/OpenvStorage/config/licenses',
                               '{0}\n'.format('\n'.join(license_contents)))
     except Exception, ex:
         logger.exception('Error applying license: {0}'.format(ex))
         return None
Example #2
0
    def refresh_package_information():
        """
        Retrieve and store the package information of all StorageRouters
        :return: None
        """
        GenericController._logger.info('Updating package information')
        threads = []
        information = {}
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            information[storagerouter.ip] = {}
            for fct in Toolbox.fetch_hooks('update', 'get_package_info_multi'):
                try:
                    # We make use of these clients in Threads --> cached = False
                    client = SSHClient(endpoint=storagerouter,
                                       username='******',
                                       cached=False)
                except UnableToConnectException:
                    information[storagerouter.ip]['errors'] = [
                        'StorageRouter {0} is inaccessible'.format(
                            storagerouter.name)
                    ]
                    break
                thread = Thread(target=fct, args=(client, information))
                thread.start()
                threads.append(thread)

        for fct in Toolbox.fetch_hooks('update', 'get_package_info_single'):
            thread = Thread(target=fct, args=(information, ))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()

        errors = []
        copy_information = copy.deepcopy(information)
        for ip, info in information.iteritems():
            if len(info.get('errors', [])) > 0:
                errors.extend(
                    ['{0}: {1}'.format(ip, error) for error in info['errors']])
                copy_information.pop(ip)

        for storagerouter in all_storagerouters:
            info = copy_information.get(storagerouter.ip, {})
            if 'errors' in info:
                info.pop('errors')
            storagerouter.package_information = info
            storagerouter.save()

        if len(errors) > 0:
            errors = [str(error) for error in set(errors)]
            raise Exception(' - {0}'.format('\n - '.join(errors)))
Example #3
0
 def validate(license_string):
     """
     Validates a license with the various components
     """
     try:
         result = {}
         data = LicenseController._decode(license_string)
         for component in data:
             cdata = data[component]
             name = cdata['name']
             data = cdata['data']
             _ = cdata['token']
             valid_until = float(
                 cdata['valid_until']) if 'valid_until' in cdata else None
             if valid_until is not None and valid_until <= time.time():
                 result[component] = False
                 continue
             signature = cdata['signature'] if 'signature' in cdata else None
             validate_functions = Toolbox.fetch_hooks(
                 'license', '{0}.validate'.format(component))
             apply_functions = Toolbox.fetch_hooks(
                 'license', '{0}.apply'.format(component))
             if len(validate_functions) == 1 and len(apply_functions) == 1:
                 try:
                     valid, metadata = validate_functions[0](
                         component=component,
                         data=data,
                         signature=signature)
                 except Exception, ex:
                     logger.debug(
                         'Error validating license for {0}: {1}'.format(
                             component, ex))
                     valid = False
                     metadata = None
                 if valid is False:
                     logger.debug('Invalid license for {0}: {1}'.format(
                         component, license_string))
                     result[component] = False
                 else:
                     result[component] = {
                         'valid_until': valid_until,
                         'metadata': metadata,
                         'name': name
                     }
             else:
                 logger.debug(
                     'No validate nor apply functions found for {0}'.format(
                         component))
                 result[component] = False
         return result
Example #4
0
 def apply(license_string):
     """
     Applies a license. It will apply as much licenses as possible, however, it won't fail on invalid licenses as it
     will simply skip them.
     """
     try:
         clients = {}
         storagerouters = StorageRouterList.get_storagerouters()
         try:
             for storagerouter in storagerouters:
                 clients[storagerouter] = SSHClient(storagerouter.ip)
         except UnableToConnectException:
             raise RuntimeError('Not all StorageRouters are reachable')
         data = LicenseController._decode(license_string)
         for component in data:
             cdata = data[component]
             name = cdata['name']
             data = cdata['data']
             token = cdata['token']
             valid_until = float(cdata['valid_until']) if 'valid_until' in cdata else None
             if valid_until is not None and valid_until <= time.time():
                 continue
             signature = cdata['signature'] if 'signature' in cdata else None
             validate_functions = Toolbox.fetch_hooks('license', '{0}.validate'.format(component))
             apply_functions = Toolbox.fetch_hooks('license', '{0}.apply'.format(component))
             if len(validate_functions) == 1 and len(apply_functions) == 1:
                 valid, metadata = validate_functions[0](component=component, data=data, signature=signature)
                 if valid is True:
                     success = apply_functions[0](component=component, data=data, signature=signature)
                     if success is True:
                         license_object = LicenseList.get_by_component(component)
                         if license_object is None:
                             license_object = License()
                         license_object.component = component
                         license_object.name = name
                         license_object.token = token
                         license_object.data = data
                         license_object.valid_until = valid_until
                         license_object.signature = signature
                         license_object.save()
         license_contents = []
         for lic in LicenseList.get_licenses():
             license_contents.append(lic.hash)
         for storagerouter in storagerouters:
             client = clients[storagerouter]
             client.file_write('/opt/OpenvStorage/config/licenses', '{0}\n'.format('\n'.join(license_contents)))
     except Exception, ex:
         LicenseController._logger.exception('Error applying license: {0}'.format(ex))
         return None
Example #5
0
    def refresh_package_information():
        """
        Retrieve and store the package information of all StorageRouters
        :return: None
        """
        GenericController._logger.info('Updating package information')
        threads = []
        information = {}
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            information[storagerouter.ip] = {}
            for function in Toolbox.fetch_hooks('update', 'get_package_info_multi'):
                try:
                    # We make use of these clients in Threads --> cached = False
                    client = SSHClient(endpoint=storagerouter, username='******', cached=False)
                except UnableToConnectException:
                    information[storagerouter.ip]['errors'] = ['StorageRouter {0} is inaccessible'.format(storagerouter.name)]
                    break
                thread = Thread(target=function,
                                args=(client, information))
                thread.start()
                threads.append(thread)

        for function in Toolbox.fetch_hooks('update', 'get_package_info_single'):
            thread = Thread(target=function,
                            args=(information,))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()

        errors = []
        copy_information = copy.deepcopy(information)
        for ip, info in information.iteritems():
            if len(info.get('errors', [])) > 0:
                errors.extend(['{0}: {1}'.format(ip, error) for error in info['errors']])
                copy_information.pop(ip)

        for storagerouter in all_storagerouters:
            info = copy_information.get(storagerouter.ip, {})
            if 'errors' in info:
                info.pop('errors')
            storagerouter.package_information = info
            storagerouter.save()

        if len(errors) > 0:
            errors = [str(error) for error in set(errors)]
            raise Exception(' - {0}'.format('\n - '.join(errors)))
Example #6
0
 def remove(license_guid):
     """
     Removes a license
     """
     clients = {}
     storagerouters = StorageRouterList.get_storagerouters()
     try:
         for storagerouter in storagerouters:
             clients[storagerouter] = SSHClient(storagerouter.ip)
     except UnableToConnectException:
         raise RuntimeError('Not all StorageRouters are reachable')
     lic = License(license_guid)
     if lic.can_remove is True:
         remove_functions = Toolbox.fetch_hooks('license', '{0}.remove'.format(lic.component))
         result = remove_functions[0](component=lic.component, data=lic.data, valid_until=lic.valid_until, signature=lic.signature)
         if result is True:
             lic.delete()
             license_contents = []
             for lic in LicenseList.get_licenses():
                 license_contents.append(lic.hash)
             for storagerouter in storagerouters:
                 client = clients[storagerouter]
                 client.file_write('/opt/OpenvStorage/config/licenses', '{0}\n'.format('\n'.join(license_contents)))
         return result
     return None
Example #7
0
 def remove(license_guid):
     """
     Removes a license
     """
     clients = {}
     storagerouters = StorageRouterList.get_storagerouters()
     try:
         for storagerouter in storagerouters:
             clients[storagerouter] = SSHClient(storagerouter.ip)
     except UnableToConnectException:
         raise RuntimeError('Not all StorageRouters are reachable')
     lic = License(license_guid)
     if lic.can_remove is True:
         remove_functions = Toolbox.fetch_hooks(
             'license', '{0}.remove'.format(lic.component))
         result = remove_functions[0](component=lic.component,
                                      data=lic.data,
                                      valid_until=lic.valid_until,
                                      signature=lic.signature)
         if result is True:
             lic.delete()
             license_contents = []
             for lic in LicenseList.get_licenses():
                 license_contents.append(lic.hash)
             for storagerouter in storagerouters:
                 client = clients[storagerouter]
                 client.file_write(
                     '/opt/OpenvStorage/config/licenses',
                     '{0}\n'.format('\n'.join(license_contents)))
         return result
     return None
Example #8
0
    def _get_package_information(self):
        versions_dict = {self._client.ip: {}}
        # ALba is always installed with OpenvStorage. The current split however offloads retrieving Alba information to the AlbaNode which is not
        # present for non openvstorage-hc installs. Therefore explicititely request the alba information like this (otherwise update will get jeopardized).
        final_dict = {}
        threads = []

        for fct in Toolbox.fetch_hooks(
                component='update', sub_component='get_package_info_cluster'):
            thread = Thread(target=fct, args=(self._client, versions_dict))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()

        for versions in versions_dict[self._client.ip].itervalues():
            for package, version in versions.iteritems():
                if package in final_dict:
                    if version != final_dict[package]:
                        final_dict[package] = min(version, final_dict[package])
                else:
                    final_dict[package] = version
        return OrderedDict(
            (key, self._stringify_looseversion(value))
            for key, value in sorted(final_dict.items(), key=lambda v: v[0]))
Example #9
0
 def _can_remove(self):
     """
     Can be removed
     """
     return len(
         Toolbox.fetch_hooks('license',
                             '{0}.remove'.format(self.component))) == 1
Example #10
0
 def run_backend_domain_hooks(backend_guid):
     """
     Run hooks when the Backend Domains have been updated
     :param backend_guid: Guid of the Backend to update
     :type backend_guid: str
     :return: None
     """
     for function in Toolbox.fetch_hooks('backend', 'domains-update'):
         function(backend_guid=backend_guid)
Example #11
0
 def run_backend_domain_hooks(backend_guid):
     """
     Run hooks when the Backend Domains have been updated
     :param backend_guid: Guid of the Backend to update
     :type backend_guid: str
     :return: None
     """
     for fct in Toolbox.fetch_hooks('backend', 'domains-update'):
         fct(backend_guid=backend_guid)
Example #12
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework',
                                      SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = []
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    clients.append(SSHClient(storagerouter, username='******'))
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

            for client in clients:
                for service_name in ['watcher-framework', 'memcached']:
                    ServiceManager.stop_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is False:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not stop service: {0}'.format(service_name))

            for client in clients:
                for service_name in ['memcached', 'watcher-framework']:
                    ServiceManager.start_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is True:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not start service: {0}'.format(
                                service_name))

            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            for function in functions:
                function(ip=ip)
Example #13
0
 def validate(license_string):
     """
     Validates a license with the various components
     """
     try:
         result = {}
         data = LicenseController._decode(license_string)
         for component in data:
             cdata = data[component]
             name = cdata['name']
             data = cdata['data']
             _ = cdata['token']
             valid_until = float(cdata['valid_until']) if 'valid_until' in cdata else None
             if valid_until is not None and valid_until <= time.time():
                 result[component] = False
                 continue
             signature = cdata['signature'] if 'signature' in cdata else None
             validate_functions = Toolbox.fetch_hooks('license', '{0}.validate'.format(component))
             apply_functions = Toolbox.fetch_hooks('license', '{0}.apply'.format(component))
             if len(validate_functions) == 1 and len(apply_functions) == 1:
                 try:
                     valid, metadata = validate_functions[0](component=component, data=data, signature=signature)
                 except Exception, ex:
                     LicenseController._logger.debug('Error validating license for {0}: {1}'.format(component, ex))
                     valid = False
                     metadata = None
                 if valid is False:
                     LicenseController._logger.debug('Invalid license for {0}: {1}'.format(component, license_string))
                     result[component] = False
                 else:
                     result[component] = {'valid_until': valid_until,
                                          'metadata': metadata,
                                          'name': name}
             else:
                 LicenseController._logger.debug('No validate nor apply functions found for {0}'.format(component))
                 result[component] = False
         return result
Example #14
0
 def merge_package_information():
     """
     Retrieve the package information from the model for both StorageRouters and ALBA Nodes and merge it
     :return: Package information for all StorageRouters and ALBA nodes
     :rtype: dict
     """
     package_info = dict((storagerouter.ip, storagerouter.package_information) for storagerouter in StorageRouterList.get_storagerouters())
     for function in Toolbox.fetch_hooks('update', 'merge_package_info'):
         output = function()
         for ip in output:
             if ip in package_info:
                 package_info[ip].update(output[ip])
             else:
                 package_info[ip] = output[ip]
     return package_info
Example #15
0
    def install_plugins():
        """
        (Re)load plugins
        """
        manager = ServiceFactory.get_manager()
        if manager.has_service('ovs-watcher-framework',
                               SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            print 'Installing plugin into Open vStorage'
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = {}
            masters = StorageRouterList.get_masters()
            slaves = StorageRouterList.get_slaves()
            try:
                for sr in masters + slaves:
                    clients[sr] = SSHClient(sr, username='******')
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')
            memcached = 'memcached'
            watcher = 'watcher-framework'
            for sr in masters + slaves:
                if manager.has_service(watcher, clients[sr]):
                    print '- Stopping watcher on {0} ({1})'.format(
                        sr.name, sr.ip)
                    manager.stop_service(watcher, clients[sr])
            for sr in masters:
                print '- Restarting memcached on {0} ({1})'.format(
                    sr.name, sr.ip)
                manager.restart_service(memcached, clients[sr])
            for sr in masters + slaves:
                if manager.has_service(watcher, clients[sr]):
                    print '- Starting watcher on {0} ({1})'.format(
                        sr.name, sr.ip)
                    manager.start_service(watcher, clients[sr])

            print '- Execute model migrations'
            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            if len(functions) > 0:
                print '- Execute post installation scripts'
            for fct in functions:
                fct(ip=ip)
            print 'Installing plugin into Open vStorage: Completed'
Example #16
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework', SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = []
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    clients.append(SSHClient(storagerouter, username='******'))
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

            for client in clients:
                for service_name in ['watcher-framework', 'memcached']:
                    ServiceManager.stop_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(service_name, client=client) is False:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError('Could not stop service: {0}'.format(service_name))

            for client in clients:
                for service_name in ['memcached', 'watcher-framework']:
                    ServiceManager.start_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(service_name, client=client) is True:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError('Could not start service: {0}'.format(service_name))

            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            for function in functions:
                function(ip=ip)
Example #17
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework', SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            print 'Installing plugin into Open vStorage'
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = {}
            masters = StorageRouterList.get_masters()
            slaves = StorageRouterList.get_slaves()
            try:
                for sr in masters + slaves:
                    clients[sr] = SSHClient(sr, username='******')
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')
            memcached = 'memcached'
            watcher = 'watcher-framework'
            for sr in masters + slaves:
                if ServiceManager.has_service(watcher, clients[sr]):
                    print '- Stopping watcher on {0} ({1})'.format(sr.name, sr.ip)
                    ServiceManager.stop_service(watcher, clients[sr])
            for sr in masters:
                print '- Restarting memcached on {0} ({1})'.format(sr.name, sr.ip)
                ServiceManager.restart_service(memcached, clients[sr])
            for sr in masters + slaves:
                if ServiceManager.has_service(watcher, clients[sr]):
                    print '- Starting watcher on {0} ({1})'.format(sr.name, sr.ip)
                    ServiceManager.start_service(watcher, clients[sr])

            print '- Execute model migrations'
            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            if len(functions) > 0:
                print '- Execute post installation scripts'
            for function in functions:
                function(ip=ip)
            print 'Installing plugin into Open vStorage: Completed'
Example #18
0
    def get_update_information_all():
        """
        Retrieve the update information for all StorageRouters
        This contains information about
            - downtime of model, GUI, vPools, proxies, ...
            - services that will be restarted
            - packages that will be updated
            - prerequisites that have not been met
        :return: Information about the update
        :rtype: dict
        """
        information = {}
        for function in Toolbox.fetch_hooks('update', 'information'):
            function(information=information)

        for component, info in copy.deepcopy(information).iteritems():
            if len(info['packages']) == 0:
                information.pop(component)
        return information
Example #19
0
    def update_framework():
        """
        Update the framework
        :return: None
        """
        filemutex = file_mutex('system_update', wait=2)
        upgrade_file = '/etc/ready_for_upgrade'
        upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
        ssh_clients = []
        try:
            filemutex.acquire()
            UpdateController._log_message('+++ Starting framework update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            UpdateController._log_message('Generating SSH client connections for each storage router')
            upgrade_file = '/etc/ready_for_upgrade'
            upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
            storage_routers = StorageRouterList.get_storagerouters()
            ssh_clients = []
            master_ips = []
            extra_ips = []
            for sr in storage_routers:
                ssh_clients.append(SSHClient(sr.ip, username='******'))
                if sr.node_type == 'MASTER':
                    master_ips.append(sr.ip)
                elif sr.node_type == 'EXTRA':
                    extra_ips.append(sr.ip)
            this_client = [client for client in ssh_clients if client.is_local is True][0]

            # Create locks
            UpdateController._log_message('Creating lock files', client_ip=this_client.ip)
            for client in ssh_clients:
                client.run('touch {0}'.format(upgrade_file))  # Prevents manual install or upgrade individual packages
                client.run('touch {0}'.format(upgrade_ongoing_check_file))  # Prevents clicking x times on 'Update' btn

            # Check requirements
            packages_to_update = set()
            all_services_to_restart = []
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'metadata'):
                    UpdateController._log_message('Executing function {0}'.format(function.__name__),
                                                  client_ip=client.ip)
                    output = function(client)
                    for key, value in output.iteritems():
                        if key != 'framework':
                            continue
                        for package_info in value:
                            packages_to_update.update(package_info['packages'])
                            all_services_to_restart += package_info['services']

            services_to_restart = []
            for service in all_services_to_restart:
                if service not in services_to_restart:
                    services_to_restart.append(service)  # Filter out duplicates maintaining the order of services (eg: watcher-framework before memcached)

            UpdateController._log_message('Services which will be restarted --> {0}'.format(', '.join(services_to_restart)))
            UpdateController._log_message('Packages which will be installed --> {0}'.format(', '.join(packages_to_update)))

            # Stop services
            if UpdateController._change_services_state(services=services_to_restart,
                                                       ssh_clients=ssh_clients,
                                                       action='stop') is False:
                UpdateController._log_message('Stopping all services on every node failed, cannot continue',
                                              client_ip=this_client.ip, severity='warning')
                UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)

                # Start services again if a service could not be stopped
                UpdateController._log_message('Attempting to start the services again', client_ip=this_client.ip)
                UpdateController._change_services_state(services=services_to_restart,
                                                        ssh_clients=ssh_clients,
                                                        action='start')

                UpdateController._log_message('Failed to stop all required services, aborting update',
                                              client_ip=this_client.ip, severity='error')
                return

            # Update packages
            failed_clients = []
            for client in ssh_clients:
                PackageManager.update(client=client)
                try:
                    UpdateController._log_message('Installing latest packages', client.ip)
                    for package in packages_to_update:
                        UpdateController._log_message('Installing {0}'.format(package), client.ip)
                        PackageManager.install(package_name=package,
                                               client=client,
                                               force=True)
                        UpdateController._log_message('Installed {0}'.format(package), client.ip)
                    client.file_delete(upgrade_file)
                except subprocess.CalledProcessError as cpe:
                    UpdateController._log_message('Upgrade failed with error: {0}'.format(cpe.output), client.ip,
                                                  'error')
                    failed_clients.append(client)
                    break

            if failed_clients:
                UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message('Error occurred. Attempting to start all services again',
                                              client_ip=this_client.ip, severity='error')
                UpdateController._change_services_state(services=services_to_restart,
                                                        ssh_clients=ssh_clients,
                                                        action='start')
                UpdateController._log_message('Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information'.format('\n - '.join([client.ip for client in failed_clients]), this_client.ip),
                                              this_client.ip,
                                              'error')
                return

            # Migrate code
            for client in ssh_clients:
                try:
                    UpdateController._log_message('Started code migration', client.ip)
                    try:
                        with remote(client.ip, [Migrator]) as rem:
                            rem.Migrator.migrate(master_ips, extra_ips)
                    except EOFError as eof:
                        UpdateController._log_message('EOFError during code migration, retrying {0}'.format(eof), client.ip, 'warning')
                        with remote(client.ip, [Migrator]) as rem:
                            rem.Migrator.migrate(master_ips, extra_ips)
                    UpdateController._log_message('Finished code migration', client.ip)
                except Exception as ex:
                    UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients)
                    UpdateController._log_message('Code migration failed with error: {0}'.format(ex), client.ip, 'error')
                    return

            # Start services
            UpdateController._log_message('Starting services', client_ip=this_client.ip)
            model_services = []
            if 'arakoon-ovsdb' in services_to_restart:
                model_services.append('arakoon-ovsdb')
                services_to_restart.remove('arakoon-ovsdb')
            if 'memcached' in services_to_restart:
                model_services.append('memcached')
                services_to_restart.remove('memcached')
            UpdateController._change_services_state(services=model_services,
                                                    ssh_clients=ssh_clients,
                                                    action='start')

            # Migrate model
            UpdateController._log_message('Started model migration', client_ip=this_client.ip)
            try:
                from ovs.dal.helpers import Migration
                with remote(ssh_clients[0].ip, [Migration]) as rem:
                    rem.Migration.migrate()
                UpdateController._log_message('Finished model migration', client_ip=this_client.ip)
            except Exception as ex:
                UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message('An unexpected error occurred: {0}'.format(ex), client_ip=this_client.ip,
                                              severity='error')
                return

            # Post upgrade actions
            UpdateController._log_message('Executing post upgrade actions', client_ip=this_client.ip)
            for client in ssh_clients:
                with remote(client.ip, [Toolbox, SSHClient]) as rem:
                    for function in rem.Toolbox.fetch_hooks('update', 'postupgrade'):
                        UpdateController._log_message('Executing action {0}'.format(function.__name__),
                                                      client_ip=client.ip)
                        try:
                            function(rem.SSHClient(client.ip, username='******'))
                            UpdateController._log_message('Executing action {0} completed'.format(function.__name__),
                                                          client_ip=client.ip)
                        except Exception as ex:
                            UpdateController._log_message('Post upgrade action failed with error: {0}'.format(ex),
                                                          client.ip, 'error')

            # Start watcher and restart support-agent
            UpdateController._change_services_state(services=services_to_restart,
                                                    ssh_clients=ssh_clients,
                                                    action='start')
            UpdateController._change_services_state(services=['support-agent'],
                                                    ssh_clients=ssh_clients,
                                                    action='restart')

            UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients)
            UpdateController._log_message('+++ Finished updating +++')
        except RuntimeError as rte:
            UpdateController._log_message('Error during framework update: {0}'.format(rte), severity='error')
            UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        except NoLockAvailableException:
            UpdateController._log_message('Another framework update is currently in progress!')
        except Exception as ex:
            UpdateController._log_message('Error during framework update: {0}'.format(ex), severity='error')
            UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        finally:
            filemutex.release()
Example #20
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.vpool import VPoolController

        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove node',
                    boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages=
            'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n'
        )
        service_manager = ServiceFactory.get_manager()

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError('Node IP must be a string')
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError('Invalid IP {0} specified'.format(node_ip))

            storage_router_all = sorted(StorageRouterList.get_storagerouters(),
                                        key=lambda k: k.name)
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set(
                [storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([
                storage_router.ip for storage_router in storage_router_masters
            ])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)
            offline_reasons = {}
            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}'
                    .format('\n - '.join(storage_router_all_ips), node_ip))

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(
                    storage_router_master_ips) == 1:
                raise RuntimeError(
                    "Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    'The node to be removed cannot be identical to the node on which the removal is initiated'
                )

            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages='Creating SSH connections to remaining master nodes')
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router,
                                       username='******',
                                       timeout=10)
                except (UnableToConnectException, NotAuthenticatedException,
                        TimeOutException) as ex:
                    if isinstance(ex, UnableToConnectException):
                        msg = 'Unable to connect'
                    elif isinstance(ex, NotAuthenticatedException):
                        msg = 'Could not authenticate'
                    elif isinstance(ex, TimeOutException):
                        msg = 'Connection timed out'
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='  * Node with IP {0:<15}- {1}'.format(
                            storage_router.ip, msg))
                    offline_reasons[storage_router.ip] = msg
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False
                    continue

                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages='  * Node with IP {0:<15}- Successfully connected'
                    .format(storage_router.ip))
                ip_client_map[storage_router.ip] = client
                if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER':
                    master_ip = storage_router.ip

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError(
                    'Could not connect to any master node in the cluster')

            storage_router_to_remove.invalidate_dynamics('vdisks_guids')
            if len(
                    storage_router_to_remove.vdisks_guids
            ) > 0:  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError(
                    "Still vDisks attached to Storage Router {0}".format(
                        storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            internal_rabbit_mq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            memcached_endpoints = Configuration.get(
                key='/ovs/framework/memcache|endpoints')
            rabbit_mq_endpoints = Configuration.get(
                key='/ovs/framework/messagequeue|endpoints')
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints
                   ) == 0 and internal_memcached is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the memcached service'
                )
            if len(copy_rabbit_mq_endpoints
                   ) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the messagequeue service'
                )

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='validate_removal',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the validation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        try:
            interactive = silent != '--force-yes'
            remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
            if interactive is True:
                if len(storage_routers_offline) > 0:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages=
                        'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.'
                    )
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Offline nodes: {0}'.format(''.join(
                            ('\n  * {0:<15}- {1}.'.format(ip, message)
                             for ip, message in offline_reasons.iteritems()))))
                    valid_node_info = Interactive.ask_yesno(
                        message=
                        'Continue the removal with these being presumably offline?',
                        default_value=False)
                    if valid_node_info is False:
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages=
                            'Please validate the state of the nodes before removing.',
                            title=True)
                        sys.exit(1)
                proceed = Interactive.ask_yesno(
                    message='Are you sure you want to remove node {0}?'.format(
                        storage_router_to_remove.name),
                    default_value=False)
                if proceed is False:
                    Toolbox.log(logger=NodeRemovalController._logger,
                                messages='Abort removal',
                                title=True)
                    sys.exit(1)

                remove_asd_manager = True
                if storage_router_to_remove_online is True:
                    client = SSHClient(endpoint=storage_router_to_remove,
                                       username='******')
                    if service_manager.has_service(name='asd-manager',
                                                   client=client):
                        remove_asd_manager = Interactive.ask_yesno(
                            message=
                            'Do you also want to remove the ASD manager and related ASDs?',
                            default_value=False)

                if remove_asd_manager is True or storage_router_to_remove_online is False:
                    for fct in Toolbox.fetch_hooks('noderemoval',
                                                   'validate_asd_removal'):
                        validation_output = fct(storage_router_to_remove.ip)
                        if validation_output['confirm'] is True:
                            if Interactive.ask_yesno(
                                    message=validation_output['question'],
                                    default_value=False) is False:
                                remove_asd_manager = False
                                break
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the confirmation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)
        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Starting removal of node {0} - {1}'.format(
                            storage_router_to_remove.name,
                            storage_router_to_remove.ip))
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages=
                    '  Marking all Storage Drivers served by Storage Router {0} as offline'
                    .format(storage_router_to_remove.ip))
                StorageDriverController.mark_offline(
                    storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='  Removing vPools from node'.format(
                            storage_router_to_remove.ip))
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline
                if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(logger=NodeRemovalController._logger,
                            messages='    Removing vPool {0} from node'.format(
                                storage_driver.vpool.name))
                VPoolController.shrink_vpool(
                    storagedriver_guid=storage_driver.guid,
                    offline_storage_router_guids=storage_routers_offline_guids)

            # Demote if MASTER
            if storage_router_to_remove.node_type == 'MASTER':
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline)

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Stopping and removing services')
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger)
                service = 'watcher-config'
                if service_manager.has_service(service, client=client):
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Removing service {0}'.format(service))
                    service_manager.stop_service(service, client=client)
                    service_manager.remove_service(service, client=client)

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='remove',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip,
                              complete_removal=remove_asd_manager)

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Removing node from model')
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete('/ovs/framework/hosts/{0}'.format(
                storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                client.file_delete(filenames=[CACC_LOCATION])
                client.file_delete(filenames=[CONFIG_STORE_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Successfully removed node\n')
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)

        if remove_asd_manager is True and storage_router_to_remove_online is True:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='\nRemoving ASD Manager')
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system('asd-manager remove --force-yes')
        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove nodes finished',
                    title=True)
Example #21
0
    def update_framework():
        """
        Update the framework
        :return: None
        """
        file_mutex = FileMutex('system_update', wait=2)
        upgrade_file = '/etc/ready_for_upgrade'
        upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
        ssh_clients = []
        try:
            file_mutex.acquire()
            UpdateController._log_message('+++ Starting framework update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            UpdateController._log_message(
                'Generating SSH client connections for each storage router')
            upgrade_file = '/etc/ready_for_upgrade'
            upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
            storage_routers = StorageRouterList.get_storagerouters()
            ssh_clients = []
            master_ips = []
            extra_ips = []
            for sr in storage_routers:
                ssh_clients.append(SSHClient(sr.ip, username='******'))
                if sr.node_type == 'MASTER':
                    master_ips.append(sr.ip)
                elif sr.node_type == 'EXTRA':
                    extra_ips.append(sr.ip)
            this_client = [
                client for client in ssh_clients if client.is_local is True
            ][0]

            # Create locks
            UpdateController._log_message('Creating lock files',
                                          client_ip=this_client.ip)
            for client in ssh_clients:
                client.run(
                    'touch {0}'.format(upgrade_file)
                )  # Prevents manual install or upgrade individual packages
                client.run('touch {0}'.format(upgrade_ongoing_check_file)
                           )  # Prevents clicking x times on 'Update' btn

            # Check requirements
            packages_to_update = set()
            all_services_to_restart = []
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'metadata'):
                    UpdateController._log_message(
                        'Executing function {0}'.format(function.__name__),
                        client_ip=client.ip)
                    output = function(client)
                    for key, value in output.iteritems():
                        if key != 'framework':
                            continue
                        for package_info in value:
                            packages_to_update.update(package_info['packages'])
                            all_services_to_restart += package_info['services']

            services_to_restart = []
            for service in all_services_to_restart:
                if service not in services_to_restart:
                    services_to_restart.append(
                        service
                    )  # Filter out duplicates maintaining the order of services (eg: watcher-framework before memcached)

            UpdateController._log_message(
                'Services which will be restarted --> {0}'.format(
                    ', '.join(services_to_restart)))
            UpdateController._log_message(
                'Packages which will be installed --> {0}'.format(
                    ', '.join(packages_to_update)))

            # Stop services
            if UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='stop') is False:
                UpdateController._log_message(
                    'Stopping all services on every node failed, cannot continue',
                    client_ip=this_client.ip,
                    severity='warning')
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)

                # Start services again if a service could not be stopped
                UpdateController._log_message(
                    'Attempting to start the services again',
                    client_ip=this_client.ip)
                UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='start')

                UpdateController._log_message(
                    'Failed to stop all required services, aborting update',
                    client_ip=this_client.ip,
                    severity='error')
                return

            # Update packages
            failed_clients = []
            for client in ssh_clients:
                PackageManager.update(client=client)
                try:
                    UpdateController._log_message('Installing latest packages',
                                                  client.ip)
                    for package in packages_to_update:
                        UpdateController._log_message(
                            'Installing {0}'.format(package), client.ip)
                        PackageManager.install(package_name=package,
                                               client=client,
                                               force=True)
                        UpdateController._log_message(
                            'Installed {0}'.format(package), client.ip)
                    client.file_delete(upgrade_file)
                except subprocess.CalledProcessError as cpe:
                    UpdateController._log_message(
                        'Upgrade failed with error: {0}'.format(cpe.output),
                        client.ip, 'error')
                    failed_clients.append(client)
                    break

            if failed_clients:
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message(
                    'Error occurred. Attempting to start all services again',
                    client_ip=this_client.ip,
                    severity='error')
                UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='start')
                UpdateController._log_message(
                    'Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information'
                    .format('\n - '.join([
                        client.ip for client in failed_clients
                    ])), this_client.ip, 'error')
                return

            # Migrate code
            for client in ssh_clients:
                try:
                    UpdateController._log_message('Started code migration',
                                                  client.ip)
                    try:
                        with Remote(client.ip, [Migrator]) as remote:
                            remote.Migrator.migrate(master_ips, extra_ips)
                    except EOFError as eof:
                        UpdateController._log_message(
                            'EOFError during code migration, retrying {0}'.
                            format(eof), client.ip, 'warning')
                        with Remote(client.ip, [Migrator]) as remote:
                            remote.Migrator.migrate(master_ips, extra_ips)
                    UpdateController._log_message('Finished code migration',
                                                  client.ip)
                except Exception as ex:
                    UpdateController._remove_lock_files(
                        [upgrade_ongoing_check_file], ssh_clients)
                    UpdateController._log_message(
                        'Code migration failed with error: {0}'.format(ex),
                        client.ip, 'error')
                    return

            # Start services
            UpdateController._log_message('Starting services',
                                          client_ip=this_client.ip)
            model_services = []
            if 'arakoon-ovsdb' in services_to_restart:
                model_services.append('arakoon-ovsdb')
                services_to_restart.remove('arakoon-ovsdb')
            if 'memcached' in services_to_restart:
                model_services.append('memcached')
                services_to_restart.remove('memcached')
            UpdateController._change_services_state(services=model_services,
                                                    ssh_clients=ssh_clients,
                                                    action='start')

            # Migrate model
            UpdateController._log_message('Started model migration',
                                          client_ip=this_client.ip)
            try:
                from ovs.dal.helpers import Migration
                Migration.migrate()
                UpdateController._log_message('Finished model migration',
                                              client_ip=this_client.ip)
            except Exception as ex:
                UpdateController._remove_lock_files(
                    [upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message(
                    'An unexpected error occurred: {0}'.format(ex),
                    client_ip=this_client.ip,
                    severity='error')
                return

            # Post upgrade actions
            UpdateController._log_message('Executing post upgrade actions',
                                          client_ip=this_client.ip)
            for client in ssh_clients:
                with Remote(client.ip, [Toolbox, SSHClient]) as remote:
                    for function in remote.Toolbox.fetch_hooks(
                            'update', 'postupgrade'):
                        UpdateController._log_message(
                            'Executing action {0}'.format(function.__name__),
                            client_ip=client.ip)
                        try:
                            function(
                                remote.SSHClient(client.ip, username='******'))
                            UpdateController._log_message(
                                'Executing action {0} completed'.format(
                                    function.__name__),
                                client_ip=client.ip)
                        except Exception as ex:
                            UpdateController._log_message(
                                'Post upgrade action failed with error: {0}'.
                                format(ex), client.ip, 'error')

            # Start watcher and restart support-agent
            UpdateController._change_services_state(
                services=services_to_restart,
                ssh_clients=ssh_clients,
                action='start')
            UpdateController._change_services_state(services=['support-agent'],
                                                    ssh_clients=ssh_clients,
                                                    action='restart')

            UpdateController._remove_lock_files([upgrade_ongoing_check_file],
                                                ssh_clients)
            UpdateController._log_message('+++ Finished updating +++')
        except RuntimeError as rte:
            if 'Could not acquire lock' in rte.message:
                UpdateController._log_message(
                    'Another framework update is currently in progress!')
            else:
                UpdateController._log_message(
                    'Error during framework update: {0}'.format(rte),
                    severity='error')
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        except Exception as ex:
            UpdateController._log_message(
                'Error during framework update: {0}'.format(ex),
                severity='error')
            UpdateController._remove_lock_files(
                [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        finally:
            file_mutex.release()
Example #22
0
    def update_volumedriver():
        """
        Update the volumedriver
        :return: None
        """
        file_mutex = FileMutex('system_update', wait=2)
        upgrade_file = '/etc/ready_for_upgrade'
        upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
        ssh_clients = []
        try:
            file_mutex.acquire()
            UpdateController._log_message(
                '+++ Starting volumedriver update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            UpdateController._log_message(
                'Generating SSH client connections for each storage router')
            storage_routers = StorageRouterList.get_storagerouters()
            ssh_clients = [
                SSHClient(storage_router.ip, 'root')
                for storage_router in storage_routers
            ]
            this_client = [
                client for client in ssh_clients if client.is_local is True
            ][0]

            # Commence update !!!!!!!
            # 0. Create locks
            UpdateController._log_message('Creating lock files',
                                          client_ip=this_client.ip)
            for client in ssh_clients:
                client.run(
                    'touch {0}'.format(upgrade_file)
                )  # Prevents manual install or upgrade individual packages
                client.run('touch {0}'.format(upgrade_ongoing_check_file)
                           )  # Prevents clicking x times on 'Update' btn

            # 1. Check requirements
            packages_to_update = set()
            all_services_to_restart = []
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'metadata'):
                    UpdateController._log_message(
                        'Executing function {0}'.format(function.__name__),
                        client_ip=client.ip)
                    output = function(client)
                    for key, value in output.iteritems():
                        if key != 'volumedriver':
                            continue
                        for package_info in value:
                            packages_to_update.update(package_info['packages'])
                            all_services_to_restart += package_info['services']

            services_to_restart = []
            for service in all_services_to_restart:
                if service not in services_to_restart:
                    services_to_restart.append(
                        service
                    )  # Filter out duplicates keeping the order of services (eg: watcher-framework before memcached)

            UpdateController._log_message(
                'Services which will be restarted --> {0}'.format(
                    ', '.join(services_to_restart)))
            UpdateController._log_message(
                'Packages which will be installed --> {0}'.format(
                    ', '.join(packages_to_update)))

            # 1. Stop services
            if UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='stop') is False:
                UpdateController._log_message(
                    'Stopping all services on every node failed, cannot continue',
                    client_ip=this_client.ip,
                    severity='warning')
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)

                UpdateController._log_message(
                    'Attempting to start the services again',
                    client_ip=this_client.ip)
                UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='start')
                UpdateController._log_message(
                    'Failed to stop all required services, update aborted',
                    client_ip=this_client.ip,
                    severity='error')
                return

            # 2. Update packages
            failed_clients = []
            for client in ssh_clients:
                PackageManager.update(client=client)
                try:
                    for package_name in packages_to_update:
                        UpdateController._log_message(
                            'Installing {0}'.format(package_name), client.ip)
                        PackageManager.install(package_name=package_name,
                                               client=client,
                                               force=True)
                        UpdateController._log_message(
                            'Installed {0}'.format(package_name), client.ip)
                    client.file_delete(upgrade_file)
                except subprocess.CalledProcessError as cpe:
                    UpdateController._log_message(
                        'Upgrade failed with error: {0}'.format(cpe.output),
                        client.ip, 'error')
                    failed_clients.append(client)
                    break

            if failed_clients:
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message(
                    'Error occurred. Attempting to start all services again',
                    client_ip=this_client.ip,
                    severity='error')
                UpdateController._change_services_state(
                    services=services_to_restart,
                    ssh_clients=ssh_clients,
                    action='start')
                UpdateController._log_message(
                    'Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information'
                    .format('\n - '.join([
                        client.ip for client in failed_clients
                    ])), this_client.ip, 'error')
                return

            # 3. Post upgrade actions
            UpdateController._log_message('Executing post upgrade actions',
                                          client_ip=this_client.ip)
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'postupgrade'):
                    UpdateController._log_message(
                        'Executing action: {0}'.format(function.__name__),
                        client_ip=client.ip)
                    try:
                        function(client)
                    except Exception as ex:
                        UpdateController._log_message(
                            'Post upgrade action failed with error: {0}'.
                            format(ex), client.ip, 'error')

            # 4. Start services
            UpdateController._log_message('Starting services',
                                          client_ip=this_client.ip)
            UpdateController._change_services_state(
                services=services_to_restart,
                ssh_clients=ssh_clients,
                action='start')

            UpdateController._remove_lock_files([upgrade_ongoing_check_file],
                                                ssh_clients)
            UpdateController._log_message('+++ Finished updating +++')
        except RuntimeError as rte:
            if 'Could not acquire lock' in rte.message:
                UpdateController._log_message(
                    'Another volumedriver update is currently in progress!')
            else:
                UpdateController._log_message(
                    'Error during volumedriver update: {0}'.format(rte),
                    severity='error')
                UpdateController._remove_lock_files(
                    [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        except Exception as ex:
            UpdateController._log_message(
                'Error during volumedriver update: {0}'.format(ex),
                severity='error')
            UpdateController._remove_lock_files(
                [upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        finally:
            file_mutex.release()
Example #23
0
    def execute_update(components):
        """
        Update the specified components on all StorageRouters
        This is called upon by 'at'
        :return: None
        """
        filemutex = file_mutex('system_update', wait=2)
        ssh_clients = []
        services_stop_start = set()
        try:
            filemutex.acquire()
            UpdateController._logger.debug('+++ Starting update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            # Create SSHClients to all nodes
            UpdateController._logger.debug('Generating SSH client connections for each storage router')
            storage_routers = StorageRouterList.get_storagerouters()
            master_ips = []
            extra_ips = []
            for sr in storage_routers:
                try:
                    ssh_clients.append(SSHClient(sr.ip, username='******'))
                    if sr.node_type == 'MASTER':
                        master_ips.append(sr.ip)
                    elif sr.node_type == 'EXTRA':
                        extra_ips.append(sr.ip)
                except UnableToConnectException:
                    raise Exception('Update is only allowed on systems where all nodes are online and fully functional')

            # Create locks
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Creating lock files'.format(client.ip))
                client.run(['touch', UpdateController._update_file])  # Prevents manual install or update individual packages
                client.run(['touch', UpdateController._update_ongoing_file])

            # Check requirements
            packages_to_update = {}
            services_post_update = set()
            update_information = UpdateController.get_update_information_all()
            for component, component_info in update_information.iteritems():
                if component in components:
                    UpdateController._logger.debug('Verifying update information for component: {0}'.format(component.upper()))
                    Toolbox.verify_required_params(actual_params=component_info,
                                                   required_params={'downtime': (list, None),
                                                                    'packages': (dict, None),
                                                                    'prerequisites': (list, None),
                                                                    'services_stop_start': (set, None),
                                                                    'services_post_update': (set, None)})
                    if len(component_info['prerequisites']) > 0:
                        raise Exception('Update is only allowed when all prerequisites have been met')

                    packages_to_update.update(component_info['packages'])
                    services_stop_start.update(component_info['services_stop_start'])
                    services_post_update.update(component_info['services_post_update'])
            if len(packages_to_update) > 0:
                UpdateController._logger.debug('Packages to be updated: {0}'.format(', '.join(sorted(packages_to_update.keys()))))
            if len(services_stop_start) > 0:
                UpdateController._logger.debug('Services to stop before package update: {0}'.format(', '.join(sorted(services_stop_start))))
            if len(services_post_update) > 0:
                UpdateController._logger.debug('Services which will be restarted after update: {0}'.format(', '.join(sorted(services_post_update))))

            # Stop services
            if UpdateController.change_services_state(services=services_stop_start,
                                                      ssh_clients=ssh_clients,
                                                      action='stop') is False:
                raise Exception('Stopping all services on every node failed, cannot continue')

            # Install packages
            # First install packages on all StorageRouters individually
            if packages_to_update:
                failures = False
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Installing packages'.format(client.ip))
                    for function in Toolbox.fetch_hooks('update', 'package_install_multi'):
                        try:
                            function(client=client, package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.error('{0}: Package installation hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))
                            failures = True

                if set(components).difference({'framework', 'storagedriver'}):
                    # Second install packages on all ALBA nodes
                    for function in Toolbox.fetch_hooks('update', 'package_install_single'):
                        try:
                            function(package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.exception('Package installation hook {0} failed with error: {1}'.format(function.__name__, ex))
                            failures = True

                if failures is True:
                    raise Exception('Installing the packages failed on 1 or more nodes')

            # Remove update file
            for client in ssh_clients:
                client.file_delete(UpdateController._update_file)

            # Migrate code
            if 'framework' in components:
                failures = []
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Verifying extensions code migration is required'.format(client.ip))
                    try:
                        key = '/ovs/framework/hosts/{0}/versions'.format(System.get_my_machine_id(client=client))
                        old_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        try:
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        except EOFError as eof:
                            UpdateController._logger.warning('{0}: EOFError during code migration, retrying {1}'.format(client.ip, eof))
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        new_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        if old_versions != new_versions:
                            UpdateController._logger.debug('{0}: Finished extensions code migration. Old versions: {1} --> New versions: {2}'.format(client.ip, old_versions, new_versions))
                    except Exception as ex:
                        failures.append('{0}: {1}'.format(client.ip, str(ex)))
                if len(failures) > 0:
                    raise Exception('Failed to run the extensions migrate code on all nodes. Errors found:\n\n{0}'.format('\n\n'.join(failures)))

            # Start memcached
            if 'memcached' in services_stop_start:
                services_stop_start.remove('memcached')
                UpdateController._logger.debug('Starting memcached')
                UpdateController.change_services_state(services=['memcached'],
                                                       ssh_clients=ssh_clients,
                                                       action='start')

            # Migrate model
            if 'framework' in components:
                UpdateController._logger.debug('Verifying DAL code migration is required')
                old_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}

                from ovs.dal.helpers import Migration
                with remote(ssh_clients[0].ip, [Migration]) as rem:
                    rem.Migration.migrate()

                new_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}
                if old_versions != new_versions:
                    UpdateController._logger.debug('Finished DAL code migration. Old versions: {0} --> New versions: {1}'.format(old_versions, new_versions))

            # Post update actions
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Executing post-update actions'.format(client.ip))
                for function in Toolbox.fetch_hooks('update', 'post_update_multi'):
                    try:
                        function(client=client, components=components)
                    except Exception as ex:
                        UpdateController._logger.exception('{0}: Post update hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))

            for function in Toolbox.fetch_hooks('update', 'post_update_single'):
                try:
                    function(components=components)
                except Exception as ex:
                    UpdateController._logger.exception('Post update hook {0} failed with error: {1}'.format(function.__name__, ex))

            # Start services
            UpdateController.change_services_state(services=services_stop_start,
                                                   ssh_clients=ssh_clients,
                                                   action='start')

            UpdateController._refresh_package_information()
            UpdateController._logger.debug('+++ Finished updating +++')
        except NoLockAvailableException:
            UpdateController._logger.debug('Another update is currently in progress!')
        except Exception as ex:
            UpdateController._logger.exception('Error during update: {0}'.format(ex))
            if len(ssh_clients) > 0:
                UpdateController.change_services_state(services=services_stop_start,
                                                       ssh_clients=ssh_clients,
                                                       action='start')
                UpdateController._refresh_package_information()
                UpdateController._logger.error('Failed to update. Please check all the logs for more information')
        finally:
            filemutex.release()
            for ssh_client in ssh_clients:
                for file_name in [UpdateController._update_file, UpdateController._update_ongoing_file]:
                    try:
                        if ssh_client.file_exists(file_name):
                            ssh_client.file_delete(file_name)
                    except:
                        UpdateController._logger.warning('[0}: Failed to remove lock file {1}'.format(ssh_client.ip, file_name))
Example #24
0
    def update_volumedriver():
        """
        Update the volumedriver
        :return: None
        """
        filemutex = file_mutex('system_update', wait=2)
        upgrade_file = '/etc/ready_for_upgrade'
        upgrade_ongoing_check_file = '/etc/upgrade_ongoing'
        ssh_clients = []
        try:
            filemutex.acquire()
            UpdateController._log_message('+++ Starting volumedriver update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            UpdateController._log_message('Generating SSH client connections for each storage router')
            storage_routers = StorageRouterList.get_storagerouters()
            ssh_clients = [SSHClient(storage_router.ip, 'root') for storage_router in storage_routers]
            this_client = [client for client in ssh_clients if client.is_local is True][0]

            # Commence update !!!!!!!
            # 0. Create locks
            UpdateController._log_message('Creating lock files', client_ip=this_client.ip)
            for client in ssh_clients:
                client.run('touch {0}'.format(upgrade_file))  # Prevents manual install or upgrade individual packages
                client.run('touch {0}'.format(upgrade_ongoing_check_file))  # Prevents clicking x times on 'Update' btn

            # 1. Check requirements
            packages_to_update = set()
            all_services_to_restart = []
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'metadata'):
                    UpdateController._log_message('Executing function {0}'.format(function.__name__),
                                                  client_ip=client.ip)
                    output = function(client)
                    for key, value in output.iteritems():
                        if key != 'volumedriver':
                            continue
                        for package_info in value:
                            packages_to_update.update(package_info['packages'])
                            all_services_to_restart += package_info['services']

            services_to_restart = []
            for service in all_services_to_restart:
                if service not in services_to_restart:
                    services_to_restart.append(service)  # Filter out duplicates keeping the order of services (eg: watcher-framework before memcached)

            UpdateController._log_message('Services which will be restarted --> {0}'.format(', '.join(services_to_restart)))
            UpdateController._log_message('Packages which will be installed --> {0}'.format(', '.join(packages_to_update)))

            # 1. Stop services
            if UpdateController._change_services_state(services=services_to_restart,
                                                       ssh_clients=ssh_clients,
                                                       action='stop') is False:
                UpdateController._log_message('Stopping all services on every node failed, cannot continue',
                                              client_ip=this_client.ip, severity='warning')
                UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)

                UpdateController._log_message('Attempting to start the services again', client_ip=this_client.ip)
                UpdateController._change_services_state(services=services_to_restart,
                                                        ssh_clients=ssh_clients,
                                                        action='start')
                UpdateController._log_message('Failed to stop all required services, update aborted',
                                              client_ip=this_client.ip, severity='error')
                return

            # 2. Update packages
            failed_clients = []
            for client in ssh_clients:
                PackageManager.update(client=client)
                try:
                    for package_name in packages_to_update:
                        UpdateController._log_message('Installing {0}'.format(package_name), client.ip)
                        PackageManager.install(package_name=package_name,
                                               client=client,
                                               force=True)
                        UpdateController._log_message('Installed {0}'.format(package_name), client.ip)
                    client.file_delete(upgrade_file)
                except subprocess.CalledProcessError as cpe:
                    UpdateController._log_message('Upgrade failed with error: {0}'.format(cpe.output), client.ip,
                                                  'error')
                    failed_clients.append(client)
                    break

            if failed_clients:
                UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
                UpdateController._log_message('Error occurred. Attempting to start all services again',
                                              client_ip=this_client.ip, severity='error')
                UpdateController._change_services_state(services=services_to_restart,
                                                        ssh_clients=ssh_clients,
                                                        action='start')
                UpdateController._log_message('Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information'.format('\n - '.join([client.ip for client in failed_clients]), this_client.ip),
                                              this_client.ip,
                                              'error')
                return

            # 3. Post upgrade actions
            UpdateController._log_message('Executing post upgrade actions', client_ip=this_client.ip)
            for client in ssh_clients:
                for function in Toolbox.fetch_hooks('update', 'postupgrade'):
                    UpdateController._log_message('Executing action: {0}'.format(function.__name__), client_ip=client.ip)
                    try:
                        function(client)
                    except Exception as ex:
                        UpdateController._log_message('Post upgrade action failed with error: {0}'.format(ex),
                                                      client.ip, 'error')

            # 4. Start services
            UpdateController._log_message('Starting services', client_ip=this_client.ip)
            UpdateController._change_services_state(services=services_to_restart,
                                                    ssh_clients=ssh_clients,
                                                    action='start')

            UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients)
            UpdateController._log_message('+++ Finished updating +++')
        except RuntimeError as rte:
            UpdateController._log_message('Error during volumedriver update: {0}'.format(rte), severity='error')
            UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        except NoLockAvailableException:
            UpdateController._log_message('Another volumedriver update is currently in progress!')
        except Exception as ex:
            UpdateController._log_message('Error during volumedriver update: {0}'.format(ex), severity='error')
            UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients)
        finally:
            filemutex.release()
Example #25
0
    def refresh_package_information():
        """
        Retrieve and store the package information of all StorageRouters
        :return: None
        """
        GenericController._logger.info('Updating package information')

        client_map = {}
        prerequisites = []
        package_info_cluster = {}
        all_storagerouters = StorageRouterList.get_storagerouters()
        all_storagerouters.sort(key=lambda sr: ExtensionsToolbox.advanced_sort(
            element=sr.ip, separator='.'))
        for storagerouter in all_storagerouters:
            package_info_cluster[storagerouter.ip] = {}
            try:
                # We make use of these clients in Threads --> cached = False
                client_map[storagerouter] = SSHClient(endpoint=storagerouter,
                                                      username='******',
                                                      cached=False)
            except (NotAuthenticatedException, UnableToConnectException):
                GenericController._logger.warning(
                    'StorageRouter {0} is inaccessible'.format(
                        storagerouter.ip))
                prerequisites.append(['node_down', storagerouter.name])
                package_info_cluster[storagerouter.ip]['errors'] = [
                    'StorageRouter {0} is inaccessible'.format(
                        storagerouter.name)
                ]

        # Retrieve for each StorageRouter in the cluster the installed and candidate versions of related packages
        # This also validates whether all required packages have been installed
        GenericController._logger.debug(
            'Retrieving package information for the cluster')
        threads = []
        for storagerouter, client in client_map.iteritems():
            for fct in Toolbox.fetch_hooks(
                    component='update',
                    sub_component='get_package_update_info_cluster'):
                thread = Thread(target=fct,
                                args=(client, package_info_cluster))
                thread.start()
                threads.append(thread)

        for thread in threads:
            thread.join()

        # Retrieve the related downtime / service restart information
        GenericController._logger.debug(
            'Retrieving update information for the cluster')
        update_info_cluster = {}
        for storagerouter, client in client_map.iteritems():
            update_info_cluster[storagerouter.ip] = {
                'errors':
                package_info_cluster[storagerouter.ip].get('errors', [])
            }
            for fct in Toolbox.fetch_hooks(
                    component='update',
                    sub_component='get_update_info_cluster'):
                fct(client, update_info_cluster,
                    package_info_cluster[storagerouter.ip])

        # Retrieve the update information for plugins (eg: ALBA, iSCSI)
        GenericController._logger.debug(
            'Retrieving package and update information for the plugins')
        threads = []
        update_info_plugin = {}
        for fct in Toolbox.fetch_hooks('update', 'get_update_info_plugin'):
            thread = Thread(target=fct, args=(update_info_plugin, ))
            thread.start()
            threads.append(thread)

        for thread in threads:
            thread.join()

        # Add the prerequisites
        if len(prerequisites) > 0:
            for ip, component_info in update_info_cluster.iteritems():
                if PackageFactory.COMP_FWK in component_info:
                    component_info[PackageFactory.COMP_FWK][
                        'prerequisites'].extend(prerequisites)

        # Store information in model and collect errors for OVS cluster
        errors = set()
        for storagerouter in all_storagerouters:
            GenericController._logger.debug(
                'Storing update information for StorageRouter {0}'.format(
                    storagerouter.ip))
            update_info = update_info_cluster.get(storagerouter.ip, {})

            # Remove the errors from the update information
            sr_errors = update_info.pop('errors', [])
            if len(sr_errors) > 0:
                errors.update([
                    '{0}: {1}'.format(storagerouter.ip, error)
                    for error in sr_errors
                ])
                update_info = {
                }  # If any error occurred, we store no update information for this StorageRouter

            # Remove the components without updates from the update information
            update_info_copy = copy.deepcopy(update_info)
            for component, info in update_info_copy.iteritems():
                if len(info['packages']) == 0:
                    update_info.pop(component)

            # Store the update information
            storagerouter.package_information = update_info
            storagerouter.save()

        # Collect errors for plugins
        for ip, plugin_errors in update_info_plugin.iteritems():
            if len(plugin_errors) > 0:
                errors.update(
                    ['{0}: {1}'.format(ip, error) for error in plugin_errors])

        if len(errors) > 0:
            raise Exception('\n - {0}'.format('\n - '.join(errors)))
        GenericController._logger.info('Finished updating package information')
Example #26
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
Example #27
0
 def _can_remove(self):
     """
     Can be removed
     """
     return len(Toolbox.fetch_hooks('license', '{0}.remove'.format(self.component))) == 1