def _refresh_package_information(): # Refresh updates UpdateController._logger.debug('Refreshing package information') counter = 1 while counter < 6: try: GenericController.refresh_package_information() return except NoLockAvailableException: UpdateController._logger.debug('Attempt {0}: Could not refresh the update information, trying again'.format(counter)) time.sleep(6) # Wait 30 seconds max in total counter += 1 if counter == 6: raise Exception('Could not refresh the update information')
def test_refresh_package_information(self): """ Test the refresh package information functionality """ def _update_info_cluster_1(client, update_info, package_info): _ = package_info update_info[client.ip]['framework'] = { 'packages': { 'package1': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } def _update_info_cluster_2(client, update_info, package_info): _ = package_info update_info[client.ip]['component2'] = { 'packages': { 'package2': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } if client.ip == storagerouter_3.ip: update_info[client.ip]['errors'] = [ 'Unexpected error occurred for StorageRouter {0}'.format( storagerouter_3.name) ] def _update_info_plugin_1(error_information): _ = error_information # get_update_info_plugin is used for Alba nodes, so not testing here expected_package_info = { 'framework': { 'packages': { 'package1': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [['node_down', '2']] }, 'component2': { 'packages': { 'package2': { 'candidate': 'version2', 'installed': 'version1' } }, 'prerequisites': [] } } # StorageRouter 1 successfully updates its package info # StorageRouter 2 is inaccessible # StorageRouter 3 gets error in 2nd hook --> package_information is reset to {} structure = DalHelper.build_dal_structure( structure={'storagerouters': [1, 2, 3]}) storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] storagerouter_3 = structure['storagerouters'][3] Toolbox._function_pointers['update-get_update_info_cluster'] = [ _update_info_cluster_1, _update_info_cluster_2 ] Toolbox._function_pointers['update-get_update_info_plugin'] = [ _update_info_plugin_1 ] SSHClient._raise_exceptions[storagerouter_2.ip] = { 'users': ['root'], 'exception': UnableToConnectException('No route to host') } with self.assertRaises(excClass=Exception) as raise_info: GenericController.refresh_package_information() storagerouter_1.discard() storagerouter_2.discard() storagerouter_3.discard() self.assertDictEqual( d1=expected_package_info, d2=storagerouter_1.package_information, msg='Incorrect package information found for StorageRouter 1'. format(storagerouter_1.name)) self.assertDictEqual( d1={}, d2=storagerouter_2.package_information, msg='Incorrect package information found for StorageRouter 2'. format(storagerouter_2.name)) self.assertDictEqual( d1={}, d2=storagerouter_3.package_information, msg='Incorrect package information found for StorageRouter {0}'. format(storagerouter_3.name)) self.assertIn( member='Unexpected error occurred for StorageRouter {0}'.format( storagerouter_3.name), container=raise_info.exception.message, msg= 'Expected to find log message about unexpected error for StorageRouter {0}' .format(storagerouter_3.name))
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()
def _post_update_alba_plugin_alba(cls, components): """ Execute some functionality after the ALBA plugin packages have been updated for the ASD manager nodes :param components: Update components which have been executed :type components: list :return: None :rtype: NoneType """ if PackageFactory.COMP_ALBA not in components: return # First run post-update migrations to update services, config mgmt, ... and restart services afterwards for method_name in ['migrate', 'migrate_sdm']: try: # noinspection PyUnresolvedReferences from ovs.lib.albamigration import AlbaMigrationController cls._logger.debug( 'Executing migration code: AlbaMigrationController.{0}()'. format(method_name)) getattr(AlbaMigrationController, method_name)() except ImportError: cls._logger.error('Could not import AlbaMigrationController') except Exception: cls._logger.exception( 'Migration code for the ALBA plugin failed to be executed') # Update ALBA nodes method_name = inspect.currentframe().f_code.co_name cls._logger.info('Executing hook {0}'.format(method_name)) alba_nodes = sorted( AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD), key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip, separator='.')) for alba_node in alba_nodes: services_to_restart = [] for component in components: if component not in alba_node.package_information: continue component_info = alba_node.package_information[component] if 'services_post_update' not in component_info: # Package_information still has the old format, so refresh update information # This can occur when updating from earlier than 2.11.0 to 2.11.0 and older try: GenericController.refresh_package_information() except: cls._logger.exception( '{0}: Refreshing package information failed'. format(alba_node.ip)) alba_node.discard() component_info = alba_node.package_information.get( component, {}) services_post_update = dict( (int(key), value) for key, value in component_info.get( 'services_post_update', {}).iteritems()) for restart_order in sorted(services_post_update): for service_name in sorted( services_post_update[restart_order]): if service_name not in services_to_restart: services_to_restart.append(service_name) if len(services_to_restart) > 0: alba_node.client.restart_services( service_names=services_to_restart) # Renew maintenance services cls._logger.info('Checkup maintenance agents') AlbaController.checkup_maintenance_agents.delay() cls._logger.info('Executed hook {0}'.format(method_name))