def migrate(cls): # type: () -> None """ Execute the migration logic. :return: None :rtype: NoneType """ with file_mutex('package_update_pu'): local_client = SSHClient(endpoint='127.0.0.1', username='******') # Override the created openvstorage_sdm_id during package install, with currently available SDM ID if local_client.file_exists(BOOTSTRAP_FILE): with open(BOOTSTRAP_FILE) as bstr_file: node_id = json.load(bstr_file)['node_id'] local_client.file_write(filename='/etc/openvstorage_sdm_id', contents=node_id + '\n') else: with open('/etc/openvstorage_sdm_id', 'r') as id_file: node_id = id_file.read().strip() key = '{0}/versions'.format( ASD_NODE_CONFIG_LOCATION.format(node_id)) version = Configuration.get(key) if Configuration.exists( key) else 0 asd_manager_service_name = 'asd-manager' if cls.service_manager.has_service( asd_manager_service_name, local_client) and cls.service_manager.get_service_status( asd_manager_service_name, local_client) == 'active': cls.logger.info('Stopping asd-manager service') cls.service_manager.stop_service(asd_manager_service_name, local_client) # @TODO: Move these migrations to alba_node.client.update_execute_migration_code() if version < cls.CURRENT_VERSION: try: # DB migrations from source.controllers.asd import ASDController from source.controllers.disk import DiskController from source.dal.asdbase import ASDBase from source.dal.lists.asdlist import ASDList from source.dal.lists.disklist import DiskList from source.dal.objects.asd import ASD if not local_client.file_exists('{0}/main.db'.format( ASDBase.DATABASE_FOLDER)): local_client.dir_create([ASDBase.DATABASE_FOLDER]) asd_map = dict( (asd.asd_id, asd) for asd in ASDList.get_asds()) DiskController.sync_disks() for disk in DiskList.get_usable_disks(): if disk.state == 'MISSING' or disk.mountpoint is None: continue for asd_id in local_client.dir_list(disk.mountpoint): if asd_id in asd_map: asd = asd_map[asd_id] else: asd = ASD() asd.disk = disk asd.asd_id = asd_id asd.folder = asd_id if asd.has_config: if asd.port is None or asd.hosts is None: config = Configuration.get( key=asd.config_key) asd.port = config['port'] asd.hosts = config.get('ips', []) asd.save() # Adjustment of open file descriptors for ASD/maintenance services to 8192 asd_service_names = list(ASDController.list_asd_services()) maintenance_service_names = list( MaintenanceController.get_services()) for service_name in asd_service_names + maintenance_service_names: if cls.service_manager.has_service( name=service_name, client=local_client): if cls.service_manager.__class__ == Systemd: path = '/lib/systemd/system/{0}.service'.format( service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format( service_name) check = 'limit nofile 8192 8192' restart_required = False if os.path.exists(path): with open(path, 'r') as system_file: if check not in system_file.read(): restart_required = True if restart_required is False: continue configuration_key = ServiceFactory.SERVICE_CONFIG_KEY.format( node_id, service_name) if Configuration.exists(configuration_key): # Rewrite the service file cls.service_manager.add_service( name=ASDController.ASD_PREFIX if service_name in asd_service_names else MaintenanceController.MAINTENANCE_PREFIX, client=local_client, params=Configuration.get( configuration_key), target_name=service_name) # Let the update know that the ASD / maintenance services need to be restarted # Inside `if Configuration.exists`, because useless to rapport restart if we haven't rewritten service file ExtensionsToolbox.edit_version_file( client=local_client, package_name='alba', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, service_name)) if cls.service_manager.__class__ == Systemd: local_client.run(['systemctl', 'daemon-reload']) # Version 3: Addition of 'ExecReload' for ASD/maintenance SystemD services if cls.service_manager.__class__ == Systemd: # Upstart does not have functionality to reload a process' configuration reload_daemon = False asd_service_names = list( ASDController.list_asd_services()) maintenance_service_names = list( MaintenanceController.get_services()) for service_name in asd_service_names + maintenance_service_names: if not cls.service_manager.has_service( name=service_name, client=local_client): continue path = '/lib/systemd/system/{0}.service'.format( service_name) if os.path.exists(path): with open(path, 'r') as system_file: if 'ExecReload' not in system_file.read(): reload_daemon = True configuration_key = ServiceFactory.SERVICE_CONFIG_KEY.format( node_id, service_name) if Configuration.exists( configuration_key): # No need to edit the service version file, since this change only requires a daemon-reload cls.service_manager.add_service( name=ASDController.ASD_PREFIX if service_name in asd_service_names else MaintenanceController. MAINTENANCE_PREFIX, client=local_client, params=Configuration.get( configuration_key), target_name=service_name) if reload_daemon is True: local_client.run(['systemctl', 'daemon-reload']) # Version 6: Introduction of Active Drive all_local_ips = OSFactory.get_manager().get_ip_addresses( client=local_client) for asd in ASDList.get_asds(): if asd.has_config: asd_config = Configuration.get(asd.config_key) if 'multicast' not in asd_config: asd_config['multicast'] = None if 'ips' in asd_config: asd_ips = asd_config['ips'] or all_local_ips else: asd_ips = all_local_ips asd.hosts = asd_ips asd_config['ips'] = asd_ips Configuration.set(asd.config_key, asd_config) asd.save() # Version 7: Moving flask certificate files to config dir for file_name in [ 'passphrase', 'server.crt', 'server.csr', 'server.key' ]: if local_client.file_exists( '/opt/asd-manager/source/{0}'.format( file_name)): local_client.file_move( source_file_name='/opt/asd-manager/source/{0}'. format(file_name), destination_file_name= '/opt/asd-manager/config/{0}'.format( file_name)) except: cls.logger.exception( 'Error while executing post-update code on node {0}'. format(node_id)) Configuration.set(key, cls.CURRENT_VERSION) if cls.service_manager.has_service( asd_manager_service_name, local_client) and cls.service_manager.get_service_status( asd_manager_service_name, local_client) != 'active': cls.logger.info('Starting asd-manager service') cls.service_manager.start_service(asd_manager_service_name, local_client) cls.logger.info('Post-update logic executed')
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again * Eg: /ovs/framework/migration|stats_monkey_integration: True """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.db.arakooninstaller import ArakoonInstaller from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator from ovs.extensions.packages.packagefactory import PackageFactory from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip, # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name), new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name)) # Register new service and remove old service service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']: proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) if service_manager.__class__ == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information vpools = VPoolList.get_vpools() for vpool in vpools: bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() ##################################### # Update the vPool metadata structure def _update_metadata_structure(metadata): metadata = copy.deepcopy(metadata) cache_structure = {'read': False, 'write': False, 'is_backend': False, 'quota': None, 'backend_info': {'name': None, # Will be filled in when is_backend is true 'backend_guid': None, 'alba_backend_guid': None, 'policies': None, 'preset': None, 'arakoon_config': None, 'connection_info': {'client_id': None, 'client_secret': None, 'host': None, 'port': None, 'local': None}} } structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read', 'write': 'block_cache_on_write', 'quota': 'quota_bc', 'backend_prefix': 'backend_bc_{0}'}, StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read', 'write': 'fragment_cache_on_write', 'quota': 'quota_fc', 'backend_prefix': 'backend_aa_{0}'}} if 'arakoon_config' in metadata['backend']: # Arakoon config should be placed under the backend info metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config') if 'connection_info' in metadata['backend']: # Connection info sohuld be placed under the backend info metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info') if 'caching_info' not in metadata: # Caching info is the new key would_be_caching_info = {} metadata['caching_info'] = would_be_caching_info # Extract all caching data for every storagerouter current_caching_info = metadata['backend'].pop('caching_info') # Pop to mutate metadata for storagerouter_guid in current_caching_info.iterkeys(): current_cache_data = current_caching_info[storagerouter_guid] storagerouter_caching_info = {} would_be_caching_info[storagerouter_guid] = storagerouter_caching_info for cache_type, cache_type_mapping in structure_map.iteritems(): new_cache_structure = copy.deepcopy(cache_structure) storagerouter_caching_info[cache_type] = new_cache_structure for new_structure_key, old_structure_key in cache_type_mapping.iteritems(): if new_structure_key == 'backend_prefix': # Get possible backend related info metadata_key = old_structure_key.format(storagerouter_guid) if metadata_key not in metadata: continue backend_data = metadata.pop(metadata_key) # Pop to mutate metadata new_cache_structure['is_backend'] = True # Copy over the old data new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config'] new_cache_structure['backend_info'].update(backend_data['backend_info']) new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info']) else: new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key) return metadata vpools = VPoolList.get_vpools() for vpool in vpools: try: new_metadata = _update_metadata_structure(vpool.metadata) vpool.metadata = new_metadata vpool.save() except KeyError: MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name)) ############################################## # Always use indent=4 during Configuration set def _resave_all_config_entries(config_path='/ovs'): """ Recursive functions which checks every config management key if its a directory or not. If not a directory, we retrieve the config and just save it again using the new indentation logic """ for item in Configuration.list(config_path): new_path = config_path + '/' + item print new_path if Configuration.dir_exists(new_path) is True: _resave_all_config_entries(config_path=new_path) else: try: _config = Configuration.get(new_path) Configuration.set(new_path, _config) except: _config = Configuration.get(new_path, raw=True) Configuration.set(new_path, _config, raw=True) if ExtensionMigrator.THIS_VERSION <= 13: # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower MigrationController._logger.info('Re-saving every configuration setting with new indentation rules') _resave_all_config_entries() ############################ # Update some default values def _update_manifest_cache_size(_proxy_config_key): updated = False manifest_cache_size = 500 * 1024 * 1024 if Configuration.exists(key=_proxy_config_key): _proxy_config = Configuration.get(key=_proxy_config_key) for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]: if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba': if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size if _proxy_config['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config['manifest_cache_size'] = manifest_cache_size if updated is True: Configuration.set(key=_proxy_config_key, value=_proxy_config) return updated for storagedriver in StorageDriverList.get_storagedrivers(): try: vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services for alba_proxy in storagedriver.alba_proxies: if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True: # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name)) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] for key, value in current_config.iteritems(): if key.isdigit() is True: if value.get('alba_connection_asd_connection_pool_capacity') != 10: changes = True value['alba_connection_asd_connection_pool_capacity'] = 10 if value.get('alba_connection_timeout') != 30: changes = True value['alba_connection_timeout'] = 30 if value.get('alba_connection_rora_manifest_cache_capacity') != 25000: changes = True value['alba_connection_rora_manifest_cache_capacity'] = 25000 if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**current_config) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) except Exception: MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id)) #################################################### # Adding proxy fail fast as env variable for proxies changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-albaproxy_'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'Environment=ALBA_FAIL_FAST=true' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'env ALBA_FAIL_FAST=true' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ###################################### # Integration of stats monkey (2.10.2) if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False: try: # Get content of old key into new key old_stats_monkey_key = '/statsmonkey/statsmonkey' if Configuration.exists(key=old_stats_monkey_key) is True: Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key)) Configuration.delete(key=old_stats_monkey_key) # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before celery_key = '/ovs/framework/scheduling/celery' current_value = None scheduling_config = Configuration.get(key=celery_key, default={}) if 'statsmonkey.run_all_stats' in scheduling_config: # Old celery task name of the stats monkey current_value = scheduling_config.pop('statsmonkey.run_all_stats') scheduling_config['ovs.stats_monkey.run_all'] = current_value scheduling_config['alba.stats_monkey.run_all'] = current_value Configuration.set(key=celery_key, value=scheduling_config) support_key = '/ovs/framework/support' support_config = Configuration.get(key=support_key) support_config['support_agent'] = support_config.pop('enabled', True) support_config['remote_access'] = support_config.pop('enablesupport', False) Configuration.set(key=support_key, value=support_config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True) except Exception: MigrationController._logger.exception('Integration of stats monkey failed') ###################################################### # Write away cluster ID to a file for back-up purposes try: cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None) with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file: config = json.load(config_file) if cluster_id is not None and config.get('cluster_id', None) is None: config['cluster_id'] = cluster_id with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file: json.dump(config, config_file, indent=4) except Exception: MigrationController._logger.exception('Writing cluster id to a file failed.') ######################################################### # Additional string formatting in Arakoon services (2.11) try: if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False: arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')] for storagerouter in StorageRouterList.get_masters(): for service_name in arakoon_service_names: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON Configuration.set(key=config_key, value=config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in ALBA proxy services (2.11) changed_clients = set() try: if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA) for service in ServiceTypeList.get_by_name('AlbaProxy').services: root_client = sr_client_map[service.storagerouter_guid] config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = alba_pkg_name config['ALBA_VERSION_CMD'] = alba_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name='ovs-{0}'.format(service.name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in DTL/VOLDRV services (2.11) try: if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False: sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: root_client = sr_client_map[storagedriver.storagerouter_guid] for entry in ['dtl', 'volumedriver']: service_name = '{0}_{1}'.format(entry, vpool.name) service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['VOLDRV_PKG_NAME'] = sd_pkg_name config['VOLDRV_VERSION_CMD'] = sd_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=service_template, client=root_client, target_name='ovs-{0}'.format(service_name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ####################################################### # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876) if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False: try: voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map.get(storagerouter.guid) if root_client is None: continue for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) regenerate = False if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER: if 'volumedriver-server' in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER) root_client.file_write(filename=file_path, contents=contents) elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE: if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE) contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE) root_client.file_write(filename=file_path, contents=contents) if regenerate is True: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD, client=root_client, target_name='ovs-{0}'.format(file_name.split('.')[0])) # Leave out .version changed_clients.add(root_client) Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True) except Exception: MigrationController._logger.exception('Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed') ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) MigrationController._logger.info('Finished out of band migrations')
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()