def _configure_arakoon_to_volumedriver(cluster_name): StorageDriverController._logger.info('Update existing vPools') config = ArakoonClusterConfig(cluster_id=cluster_name) arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({ 'host': node.ip, 'port': node.client_port, 'node_id': node.name }) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list( '/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry( vregistry_arakoon_cluster_id=cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store( dls_type='Arakoon', dls_arakoon_cluster_id=cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save()
def get_configuration(vpool_guid): vpool = VPool(vpool_guid) if not vpool.storagedrivers or not vpool.storagedrivers[0].storagerouter: return {} client = SSHClient(vpool.storagedrivers[0].storagerouter) storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) volume_router = storagedriver_config.configuration.get('volume_router', {}) volume_manager = storagedriver_config.configuration.get('volume_manager', {}) dedupe_mode = volume_manager.get('read_cache_default_mode', StorageDriverClient.VOLDRV_CONTENT_BASED) cache_strategy = volume_manager.get('read_cache_default_behaviour', StorageDriverClient.VOLDRV_CACHE_ON_READ) sco_multiplier = volume_router.get('vrouter_sco_multiplier', 1024) tlog_multiplier = volume_manager.get('number_of_scos_in_tlog', 20) non_disposable_sco_factor = volume_manager.get('non_disposable_scos_factor', 12) dtl_mode = storagedriver_config.configuration.get('', {}).get('', None) sco_size = sco_multiplier * 4 / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) dtl_enabled = storagedriver_config.configuration.get('', {}).get('', False) dtl_location = storagedriver_config.configuration.get('', {}).get('', None) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor return {'sco_size': sco_size, 'dtl_mode': dtl_mode, 'dtl_enabled': dtl_enabled, 'dedupe_mode': StorageDriverClient.REVERSE_DEDUPE_MAP[dedupe_mode], 'write_buffer': write_buffer, 'dtl_location': dtl_location, 'cache_strategy': StorageDriverClient.REVERSE_CACHE_MAP[cache_strategy], 'tlog_multiplier': tlog_multiplier}
def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration( 'storagedriver', self.guid, self.storagedrivers[0].storagedriver_id) storagedriver_config.load() for expected_key in [ 'distributed_transaction_log', 'filesystem', 'volume_router', 'volume_manager' ]: if expected_key not in storagedriver_config.configuration: return {} dtl = storagedriver_config.configuration['distributed_transaction_log'] file_system = storagedriver_config.configuration['filesystem'] volume_router = storagedriver_config.configuration['volume_router'] volume_manager = storagedriver_config.configuration['volume_manager'] dtl_host = file_system['fs_dtl_host'] dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager['default_cluster_size'] / 1024 dtl_transport = dtl['dtl_transport'] sco_multiplier = volume_router['vrouter_sco_multiplier'] dtl_config_mode = file_system['fs_dtl_config_mode'] tlog_multiplier = volume_manager['number_of_scos_in_tlog'] non_disposable_sco_factor = volume_manager[ 'non_disposable_scos_factor'] sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == '') return { 'sco_size': sco_size, 'dtl_mode': StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else 'no_sync', 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'dtl_config_mode': dtl_config_mode, 'tlog_multiplier': tlog_multiplier }
def get_configuration(vpool_guid): """ Retrieve the running storagedriver configuration for the vPool :param vpool_guid: Guid of the vPool to retrieve running configuration for :return: Dictionary with configuration """ vpool = VPool(vpool_guid) if not vpool.storagedrivers or not vpool.storagedrivers[0].storagerouter: return {} client = None for sd in vpool.storagedrivers: try: client = SSHClient(sd.storagerouter) client.run("pwd") break except UnableToConnectException: client = None pass if client is None: raise RuntimeError("Could not find an online storage router to retrieve vPool configuration from") storagedriver_config = StorageDriverConfiguration("storagedriver", vpool.name) storagedriver_config.load(client) dtl = storagedriver_config.configuration.get("failovercache", {}) file_system = storagedriver_config.configuration.get("filesystem", {}) volume_router = storagedriver_config.configuration.get("volume_router", {}) volume_manager = storagedriver_config.configuration.get("volume_manager", {}) dtl_mode = file_system.get("fs_dtl_mode", StorageDriverClient.VOLDRV_DTL_ASYNC) dedupe_mode = volume_manager.get("read_cache_default_mode", StorageDriverClient.VOLDRV_CONTENT_BASED) dtl_transport = dtl.get("failovercache_transport", StorageDriverClient.VOLDRV_DTL_TRANSPORT_TCP) cache_strategy = volume_manager.get("read_cache_default_behaviour", StorageDriverClient.VOLDRV_CACHE_ON_READ) sco_multiplier = volume_router.get("vrouter_sco_multiplier", 1024) dtl_config_mode = file_system.get("fs_dtl_config_mode", StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE) tlog_multiplier = volume_manager.get("number_of_scos_in_tlog", 20) non_disposable_sco_factor = volume_manager.get("non_disposable_scos_factor", 12) sco_size = sco_multiplier * 4 / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_mode = StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] dtl_enabled = dtl_config_mode == StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE if dtl_enabled is False: dtl_mode = StorageDriverClient.FRAMEWORK_DTL_NO_SYNC return { "sco_size": sco_size, "dtl_mode": dtl_mode, "dedupe_mode": StorageDriverClient.REVERSE_DEDUPE_MAP[dedupe_mode], "dtl_enabled": dtl_enabled, "write_buffer": write_buffer, "dtl_transport": StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], "cache_strategy": StorageDriverClient.REVERSE_CACHE_MAP[cache_strategy], "tlog_multiplier": tlog_multiplier, }
def get_configuration(vpool_guid): """ Retrieve the running storagedriver configuration for the vPool :param vpool_guid: Guid of the vPool to retrieve running configuration for :return: Dictionary with configuration """ vpool = VPool(vpool_guid) if not vpool.storagedrivers or not vpool.storagedrivers[0].storagerouter: return {} client = None for sd in vpool.storagedrivers: try: client = SSHClient(sd.storagerouter) client.run('pwd') break except UnableToConnectException: client = None pass if client is None: raise RuntimeError('Could not find an online storage router to retrieve vPool configuration from') storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, vpool.storagedrivers[0].storagedriver_id) storagedriver_config.load() dtl = storagedriver_config.configuration.get('distributed_transaction_log', {}) file_system = storagedriver_config.configuration.get('filesystem', {}) volume_router = storagedriver_config.configuration.get('volume_router', {}) volume_manager = storagedriver_config.configuration.get('volume_manager', {}) dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) dedupe_mode = volume_manager.get('read_cache_default_mode', StorageDriverClient.VOLDRV_CONTENT_BASED) cluster_size = volume_manager.get('default_cluster_size', 4096) / 1024 dtl_transport = dtl.get('dtl_transport', StorageDriverClient.VOLDRV_DTL_TRANSPORT_TCP) cache_strategy = volume_manager.get('read_cache_default_behaviour', StorageDriverClient.VOLDRV_CACHE_ON_READ) sco_multiplier = volume_router.get('vrouter_sco_multiplier', 1024) dtl_config_mode = file_system.get('fs_dtl_config_mode', StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE) tlog_multiplier = volume_manager.get('number_of_scos_in_tlog', 20) non_disposable_sco_factor = volume_manager.get('non_disposable_scos_factor', 12) sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_mode = StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] dtl_enabled = dtl_config_mode == StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE if dtl_enabled is False: dtl_mode = StorageDriverClient.FRAMEWORK_DTL_NO_SYNC return {'sco_size': sco_size, 'dtl_mode': dtl_mode, 'dedupe_mode': StorageDriverClient.REVERSE_DEDUPE_MAP[dedupe_mode], 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'cache_strategy': StorageDriverClient.REVERSE_CACHE_MAP[cache_strategy], 'tlog_multiplier': tlog_multiplier}
def get_configuration(vpool_guid): """ Retrieve the running storagedriver configuration for the vPool :param vpool_guid: Guid of the vPool to retrieve running configuration for :return: Dictionary with configuration """ vpool = VPool(vpool_guid) if not vpool.storagedrivers or not vpool.storagedrivers[0].storagerouter: return {} client = None for sd in vpool.storagedrivers: try: client = SSHClient(sd.storagerouter) client.run('pwd') break except UnableToConnectException: client = None pass if client is None: raise RuntimeError('Could not find an online storage router to retrieve vPool configuration from') storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, vpool.storagedrivers[0].storagedriver_id) storagedriver_config.load() dtl = storagedriver_config.configuration.get('failovercache', {}) file_system = storagedriver_config.configuration.get('filesystem', {}) volume_router = storagedriver_config.configuration.get('volume_router', {}) volume_manager = storagedriver_config.configuration.get('volume_manager', {}) dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) dedupe_mode = volume_manager.get('read_cache_default_mode', StorageDriverClient.VOLDRV_CONTENT_BASED) dtl_transport = dtl.get('failovercache_transport', StorageDriverClient.VOLDRV_DTL_TRANSPORT_TCP) cache_strategy = volume_manager.get('read_cache_default_behaviour', StorageDriverClient.VOLDRV_CACHE_ON_READ) sco_multiplier = volume_router.get('vrouter_sco_multiplier', 1024) dtl_config_mode = file_system.get('fs_dtl_config_mode', StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE) tlog_multiplier = volume_manager.get('number_of_scos_in_tlog', 20) non_disposable_sco_factor = volume_manager.get('non_disposable_scos_factor', 12) sco_size = sco_multiplier * 4 / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_mode = StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] dtl_enabled = dtl_config_mode == StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE if dtl_enabled is False: dtl_mode = StorageDriverClient.FRAMEWORK_DTL_NO_SYNC return {'sco_size': sco_size, 'dtl_mode': dtl_mode, 'dedupe_mode': StorageDriverClient.REVERSE_DEDUPE_MAP[dedupe_mode], 'dtl_enabled': dtl_enabled, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'cache_strategy': StorageDriverClient.REVERSE_CACHE_MAP[cache_strategy], 'tlog_multiplier': tlog_multiplier}
def get_config_params(vdisk_guid): """ Retrieve the configuration parameters for the given disk from the storagedriver. :param vdisk_guid: Guid of the virtual disk to retrieve the configuration for """ vdisk = VDisk(vdisk_guid) vpool = VPool(vdisk.vpool_guid) vpool_client = SSHClient(vpool.storagedrivers[0].storagerouter) storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(vpool_client) volume_manager = storagedriver_config.configuration.get('volume_manager', {}) volume_id = str(vdisk.volume_id) sco_size = vdisk.storagedriver_client.get_sco_multiplier(volume_id) / 1024 * 4 dtl_config = vdisk.storagedriver_client.get_dtl_config(volume_id) dedupe_mode = vdisk.storagedriver_client.get_readcache_mode(volume_id) cache_strategy = vdisk.storagedriver_client.get_readcache_behaviour(volume_id) tlog_multiplier = vdisk.storagedriver_client.get_tlog_multiplier(volume_id) readcache_limit = vdisk.storagedriver_client.get_readcache_limit(volume_id) non_disposable_sco_factor = vdisk.storagedriver_client.get_sco_cache_max_non_disposable_factor(volume_id) dtl_target = None if dtl_config is None: dtl_mode = 'no_sync' else: if dtl_config.host == 'null': dtl_mode = 'no_sync' else: dtl_mode = StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_config.mode] dtl_target = dtl_config.host if dedupe_mode is None: dedupe_mode = volume_manager.get('read_cache_default_mode', StorageDriverClient.VOLDRV_CONTENT_BASED) if cache_strategy is None: cache_strategy = volume_manager.get('read_cache_default_behaviour', StorageDriverClient.VOLDRV_CACHE_ON_READ) if tlog_multiplier is None: tlog_multiplier = volume_manager.get('number_of_scos_in_tlog', 20) if readcache_limit is not None: vol_info = vdisk.storagedriver_client.info_volume(volume_id) block_size = vol_info.lba_size * vol_info.cluster_multiplier or 4096 readcache_limit = readcache_limit * block_size / 1024 / 1024 / 1024 if non_disposable_sco_factor is None: non_disposable_sco_factor = volume_manager.get('non_disposable_scos_factor', 12) return {'sco_size': sco_size, 'dtl_mode': dtl_mode, 'dedupe_mode': StorageDriverClient.REVERSE_DEDUPE_MAP[dedupe_mode], 'write_buffer': tlog_multiplier * sco_size * non_disposable_sco_factor, 'dtl_target': dtl_target, 'cache_strategy': StorageDriverClient.REVERSE_CACHE_MAP[cache_strategy], 'readcache_limit': readcache_limit}
def remove_mds_service(mds_service, vpool, reload_config): """ Removes an MDS service :param mds_service: The MDS service to remove :param vpool: The vPool for which the MDS service will be removed :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') storagerouter = mds_service.service.storagerouter client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() mds_service.delete() mds_service.service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB] mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp[0], 'scratch_directory': sdp[0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: logger.debug('Recursively removed {0}'.format(dir_name)) break except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1 if tries == 0: raise
def _configure_amqp_to_volumedriver(): Toolbox.log(logger=NodeTypeController._logger, messages='Update existing vPools') login = Configuration.get('/ovs/framework/messagequeue|user') password = Configuration.get('/ovs/framework/messagequeue|password') protocol = Configuration.get('/ovs/framework/messagequeue|protocol') uris = [] for endpoint in Configuration.get( '/ovs/framework/messagequeue|endpoints'): uris.append({ 'amqp_uri': '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint) }) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list( '/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_event_publisher( events_amqp_routing_key=Configuration.get( '/ovs/framework/messagequeue|queues.storagedriver' ), events_amqp_uris=uris) storagedriver_config.save()
def remove_mds_service(mds_service, client, storagerouter, vpool, reload_config): """ Removes an MDS service """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Clean up model this_service_number = mds_service.number service = mds_service.service mds_service.delete() service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, mds_service.number), 'scratch_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, mds_service.number)}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 cleaned = False while tries > 0 and cleaned is False: try: client.dir_delete(['{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, this_service_number), '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, this_service_number)]) logger.debug('MDS files cleaned up') cleaned = True except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1
def check_volume_potential(result_handler, critical_vol_number=25): """ Checks all local storage drivers from a volume driver. Results in a success if enough volumes are available, a warning if the number of volumes is lower then a threshold value (critical_volume_number) and a failure if the nr of volumes ==0) :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :param critical_vol_number: Mimimal number of volumes that can be made before throwing a warning :type critical_vol_number: int """ result_handler.info('Checking volume potential of storagedrivers') if not isinstance(critical_vol_number, int) or critical_vol_number < 0: raise ValueError('Critical volume number should be a positive integer') for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers: try: std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) vol_potential = client.volume_potential(str(std.storagedriver_id)) if vol_potential >= critical_vol_number: log_level = 'success' elif critical_vol_number > vol_potential > 0: log_level = 'warning' else: log_level = 'failure' getattr(result_handler, log_level)('Volume potential of local storage driver: {0}: {1} (potential at: {2})'.format(std.storagedriver_id, log_level.upper(), vol_potential)) except RuntimeError: result_handler.exception('Unable to retrieve configuration for storagedriver {0}'.format(std.storagedriver_id))
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required """ mds_dict = {} for vpool in VPoolList.get_vpools(): for mds_service in vpool.mds_services: storagerouter = mds_service.service.storagerouter if vpool not in mds_dict: mds_dict[vpool] = {} if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = {'client': SSHClient(storagerouter, username='******'), 'services': []} mds_dict[vpool][storagerouter]['services'].append(mds_service) for vpool, storagerouter_info in mds_dict.iteritems(): # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded # If not, create an extra MDS for that StorageRouter for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] has_room = False for mds_service in mds_services[:]: if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0: client = SSHClient(storagerouter) MDSServiceController.remove_mds_service(mds_service, client, storagerouter, vpool, reload_config=True) mds_services.remove(mds_service) for mds_service in mds_services: _, load = MDSServiceController.get_mds_load(mds_service) if load < Configuration.get('ovs.storagedriver.mds.maxload'): has_room = True break if has_room is False: mds_service = MDSServiceController.prepare_mds_service(client, storagerouter, vpool, fresh_only=False, reload_config=True) if mds_service is None: raise RuntimeError('Could not add MDS node') mds_services.append(mds_service) mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool) for storagerouter in mds_dict[vpool]: client = mds_dict[vpool][storagerouter]['client'] storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) if storagedriver_config.is_new is False: storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid] ) storagedriver_config.save(client) # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal. for vdisk in vpool.vdisks: MDSServiceController.ensure_safety(vdisk)
def remove_mds_service(mds_service, client, storagerouter, vpool, reload_config): """ Removes an MDS service """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() service = mds_service.service mds_service.delete() service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB][0], 'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.SCRUB][0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 cleaned = False while tries > 0 and cleaned is False: try: client.dir_delete(directories_to_clean) logger.debug('MDS files cleaned up') cleaned = True except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1
def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration( "storagedriver", self.guid, self.storagedrivers[0].storagedriver_id ) storagedriver_config.load() dtl = storagedriver_config.configuration["distributed_transaction_log"] file_system = storagedriver_config.configuration["filesystem"] volume_router = storagedriver_config.configuration["volume_router"] volume_manager = storagedriver_config.configuration["volume_manager"] dtl_host = file_system["fs_dtl_host"] dtl_mode = file_system.get("fs_dtl_mode", StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager["default_cluster_size"] / 1024 dtl_transport = dtl["dtl_transport"] sco_multiplier = volume_router["vrouter_sco_multiplier"] dtl_config_mode = file_system["fs_dtl_config_mode"] tlog_multiplier = volume_manager["number_of_scos_in_tlog"] non_disposable_sco_factor = volume_manager["non_disposable_scos_factor"] sco_size = ( sco_multiplier * cluster_size / 1024 ) # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == "") return { "sco_size": sco_size, "dtl_mode": StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else "no_sync", "dtl_enabled": dtl_enabled, "cluster_size": cluster_size, "write_buffer": write_buffer, "dtl_transport": StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], "dtl_config_mode": dtl_config_mode, "tlog_multiplier": tlog_multiplier, }
def refresh_configuration(storagedriver_guid): """ Refresh the StorageDriver's configuration (Configuration must have been updated manually) :param storagedriver_guid: Guid of the StorageDriver :type storagedriver_guid: str :return: Amount of changes the volumedriver detected :rtype: int """ storagedriver = StorageDriver(storagedriver_guid) try: client = SSHClient(endpoint=storagedriver.storagerouter) except UnableToConnectException: raise Exception( 'StorageRouter with IP {0} is not reachable. Cannot refresh the configuration' .format(storagedriver.storagerouter.ip)) storagedriver_config = StorageDriverConfiguration( vpool_guid=storagedriver.vpool_guid, storagedriver_id=storagedriver.storagedriver_id) return len(storagedriver_config.save(client=client, force_reload=True))
def _configure_arakoon_to_volumedriver(cluster_name): StorageDriverController._logger.info('Update existing vPools') config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False) config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({'host': node.ip, 'port': node.client_port, 'node_id': node.name}) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id=cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration('storagedriver', self.guid, self.storagedrivers[0].storagedriver_id) storagedriver_config.load() dtl = storagedriver_config.configuration['distributed_transaction_log'] file_system = storagedriver_config.configuration['filesystem'] volume_router = storagedriver_config.configuration['volume_router'] volume_manager = storagedriver_config.configuration['volume_manager'] dtl_host = file_system['fs_dtl_host'] dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager['default_cluster_size'] / 1024 dtl_transport = dtl['dtl_transport'] sco_multiplier = volume_router['vrouter_sco_multiplier'] dtl_config_mode = file_system['fs_dtl_config_mode'] tlog_multiplier = volume_manager['number_of_scos_in_tlog'] non_disposable_sco_factor = volume_manager['non_disposable_scos_factor'] sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == '') return {'sco_size': sco_size, 'dtl_mode': StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else 'no_sync', 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'dtl_config_mode': dtl_config_mode, 'tlog_multiplier': tlog_multiplier}
def check_sco_cache_mountpoints(result_handler): """ Iterates over StorageDrivers of a local StorageRouter and will check all its sco cache mount points. Will result in a warning log if the sco is in offline state :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults """ result_handler.info('Checking sco cache mount points on all local storagedrivers') for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers: try: std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) for std_info in client.sco_cache_mount_point_info(str(std.storagedriver_id)): if std_info.offlined is True: result_handler.warning('Mountpoint at location {0} of storagedriver {1} is in offline state'.format(std_info.path, std.storagedriver_id)) else: result_handler.success('Mountpoint at location {0} of storagedriver {1} is in online state'.format(std_info.path, std.storagedriver_id)) except RuntimeError: result_handler.exception('Unable to check sco cache mountpoint of storagedriver {0}'.format(std.storagedriver_id))
def _configure_amqp_to_volumedriver(): Toolbox.log(logger=NodeTypeController._logger, messages='Update existing vPools') login = Configuration.get('/ovs/framework/messagequeue|user') password = Configuration.get('/ovs/framework/messagequeue|password') protocol = Configuration.get('/ovs/framework/messagequeue|protocol') uris = [] for endpoint in Configuration.get('/ovs/framework/messagequeue|endpoints'): uris.append({'amqp_uri': '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)}) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'), events_amqp_uris=uris) storagedriver_config.save()
def _volume_potentials(self): # type: () -> Dict[str, int] """ Get an overview of all volume potentials for every Storagedriver in this vpool A possible -1 can be returned for the volume potential which indicates that the potential could not be retrieved :return: The overview with the volume potential :rtype: dict """ volume_potentials = {} for storagedriver in self.storagedrivers: volume_potential = -1 try: std_config = StorageDriverConfiguration( storagedriver.vpool_guid, storagedriver.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) volume_potential = client.volume_potential( str(storagedriver.storagedriver_id)) except Exception: self._logger.exception( 'Unable to retrieve configuration for storagedriver {0}'. format(storagedriver.storagedriver_id)) volume_potentials[ storagedriver.storagerouter.guid] = volume_potential return volume_potentials
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk('/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join([root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk(ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join([sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run('cat {0}'.format(state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, {'type': DataList.where_operator.AND, 'items': []}) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :param vpool: The vPool for which the MDS service will be created :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = client.config_read('ovs.ports.mds') free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError('Failed to find an available port on storage router {0} within range {1}'.format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on storage router {0}'.format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError('Expected to find a configured storagedriver for vpool {0} on storage router {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController sdp = StorageDriverController.add_storagedriverpartition(storagedrivers[0], {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required """ MDSServiceController._logger.info("MDS checkup - Started") mds_dict = {} for vpool in VPoolList.get_vpools(): MDSServiceController._logger.info("MDS checkup - vPool {0}".format(vpool.name)) mds_dict[vpool] = {} for mds_service in vpool.mds_services: storagerouter = mds_service.service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = {"client": None, "services": []} try: mds_dict[vpool][storagerouter]["client"] = SSHClient(storagerouter, username="******") MDSServiceController._logger.info( "MDS checkup - vPool {0} - Storage Router {1} - ONLINE".format( vpool.name, storagerouter.name ) ) except UnableToConnectException: MDSServiceController._logger.info( "MDS checkup - vPool {0} - Storage Router {1} - OFFLINE".format( vpool.name, storagerouter.name ) ) mds_dict[vpool][storagerouter]["services"].append(mds_service) failures = [] max_load = Configuration.get("/ovs/framework/storagedriver|mds_maxload") for vpool, storagerouter_info in mds_dict.iteritems(): # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded # If not, create an extra MDS for that StorageRouter for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]["client"] mds_services = mds_dict[vpool][storagerouter]["services"] has_room = False for mds_service in mds_services[:]: if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0: MDSServiceController._logger.info( "MDS checkup - Removing mds_service {0} for vPool {1}".format( mds_service.number, vpool.name ) ) MDSServiceController.remove_mds_service( mds_service, vpool, reconfigure=True, allow_offline=client is None ) mds_services.remove(mds_service) for mds_service in mds_services: _, load = MDSServiceController.get_mds_load(mds_service) if load < max_load: has_room = True break MDSServiceController._logger.info( "MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}".format( vpool.name, storagerouter.name, has_room ) ) if has_room is False and client is not None: mds_service = MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool, fresh_only=False, reload_config=True ) if mds_service is None: raise RuntimeError("Could not add MDS node") mds_services.append(mds_service) mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool, True) for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]["client"] if client is None: MDSServiceController._logger.info( "MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration".format( vpool.name, storagerouter.name ) ) continue storagedriver = [sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid][0] storagedriver_config = StorageDriverConfiguration( "storagedriver", vpool.guid, storagedriver.storagedriver_id ) storagedriver_config.load() if storagedriver_config.is_new is False: MDSServiceController._logger.info( "MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}".format( vpool.name, storagerouter.name, mds_config_set[storagerouter.guid] ) ) storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid] ) storagedriver_config.save(client) # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( "MDS checkup - vPool {0} - Ensuring safety for all virtual disks".format(vpool.name) ) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk) except Exception: message = "Ensure safety for vDisk {0} with guid {1} failed".format(vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) failures.append(message) if len(failures) > 0: raise Exception("\n - " + "\n - ".join(failures)) MDSServiceController._logger.info("MDS checkup - Finished")
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in ['MetadataServer', 'AlbaProxy', 'Arakoon']: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Brandings branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.extensions.generic.remote import Remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name('MetadataServer').services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: stat_dir = directory while not remote.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = remote.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: inode = remote.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool.name) config.load(SSHClient(storagedriver.storagerouter, username='******')) for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 return working_version
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required """ MDSServiceController._logger.info('MDS checkup - Started') mds_dict = {} for vpool in VPoolList.get_vpools(): MDSServiceController._logger.info('MDS checkup - vPool {0}'.format( vpool.name)) mds_dict[vpool] = {} for mds_service in vpool.mds_services: storagerouter = mds_service.service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': None, 'services': [] } try: mds_dict[vpool][storagerouter]['client'] = SSHClient( storagerouter, username='******') MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - ONLINE' .format(vpool.name, storagerouter.name)) except UnableToConnectException: MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - OFFLINE' .format(vpool.name, storagerouter.name)) mds_dict[vpool][storagerouter]['services'].append(mds_service) failures = [] max_load = Configuration.get( '/ovs/framework/storagedriver|mds_maxload') for vpool, storagerouter_info in mds_dict.iteritems(): # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded # If not, create an extra MDS for that StorageRouter for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] has_room = False for mds_service in mds_services[:]: if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: MDSServiceController._logger.info( 'MDS checkup - Removing mds_service {0} for vPool {1}' .format(mds_service.number, vpool.name)) MDSServiceController.remove_mds_service( mds_service, vpool, reconfigure=True, allow_offline=client is None) mds_services.remove(mds_service) for mds_service in mds_services: _, load = MDSServiceController.get_mds_load(mds_service) if load < max_load: has_room = True break MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}' .format(vpool.name, storagerouter.name, has_room)) if has_room is False and client is not None: mds_service = MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool, fresh_only=False, reload_config=True) if mds_service is None: raise RuntimeError('Could not add MDS node') mds_services.append(mds_service) mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool, True) for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] if client is None: MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver = [ sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid ][0] storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if storagedriver_config.is_new is False: MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}' .format(vpool.name, storagerouter.name, mds_config_set[storagerouter.guid])) storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ storagerouter.guid]) storagedriver_config.save(client) # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( 'MDS checkup - vPool {0} - Ensuring safety for all virtual disks' .format(vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk) except Exception: message = 'Ensure safety for vDisk {0} with guid {1} failed'.format( vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) failures.append(message) if len(failures) > 0: raise Exception('\n - ' + '\n - '.join(failures)) MDSServiceController._logger.info('MDS checkup - Finished')
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :type mds_service: MDSService :param vpool: The vPool for which the MDS service will be removed :type vpool: VPool :param reconfigure: Indicates whether reconfiguration is required :type reconfigure: bool :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline :type allow_offline: bool """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError( 'Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER) # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if allow_offline is True: # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver = [ sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid ][0] storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server( mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: MDSServiceController._logger.debug( 'Recursively removed {0}'.format(dir_name)) break except Exception: MDSServiceController._logger.debug( 'Waiting for the MDS service to go down...') time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: MDSServiceController._logger.info( 'Allowed offline node during mds service removal') else: raise
def add_vpool(cls, parameters): """ Add a vPool to the machine this task is running on :param parameters: Parameters for vPool creation :type parameters: dict :return: None :rtype: NoneType """ # TODO: Add logging cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters)) # VALIDATIONS if not isinstance(parameters, dict): raise ValueError( 'Parameters passed to create a vPool should be of type dict') # Check StorageRouter existence storagerouter = StorageRouterList.get_by_ip( ip=parameters.get('storagerouter_ip')) if storagerouter is None: raise RuntimeError('Could not find StorageRouter') # Validate requested vPool configurations vp_installer = VPoolInstaller(name=parameters.get('vpool_name')) vp_installer.validate(storagerouter=storagerouter) # Validate requested StorageDriver configurations cls._logger.info( 'vPool {0}: Validating StorageDriver configurations'.format( vp_installer.name)) sd_installer = StorageDriverInstaller( vp_installer=vp_installer, configurations={ 'storage_ip': parameters.get('storage_ip'), 'caching_info': parameters.get('caching_info'), 'backend_info': { 'main': parameters.get('backend_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('backend_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('backend_info_fc') }, 'connection_info': { 'main': parameters.get('connection_info'), StorageDriverConfiguration.CACHE_BLOCK: parameters.get('connection_info_bc'), StorageDriverConfiguration.CACHE_FRAGMENT: parameters.get('connection_info_fc') }, 'sd_configuration': parameters.get('config_params') }) partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format( storagerouter.guid)) try: # VPOOL CREATION # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state) if vp_installer.is_new is True: vp_installer.create(rdma_enabled=sd_installer.rdma_enabled) vp_installer.configure_mds( config=parameters.get('mds_config_params', {})) else: vp_installer.update_status(status=VPool.STATUSES.EXTENDING) # ADDITIONAL VALIDATIONS # Check StorageRouter connectivity cls._logger.info( 'vPool {0}: Validating StorageRouter connectivity'.format( vp_installer.name)) linked_storagerouters = [storagerouter] if vp_installer.is_new is False: linked_storagerouters += [ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ] sr_client_map = SSHClient.get_clients( endpoints=linked_storagerouters, user_names=['ovs', 'root']) offline_nodes = sr_client_map.pop('offline') if storagerouter in offline_nodes: raise RuntimeError( 'Node on which the vPool is being {0} is not reachable'. format('created' if vp_installer.is_new is True else 'extended')) sr_installer = StorageRouterInstaller( root_client=sr_client_map[storagerouter]['root'], sd_installer=sd_installer, vp_installer=vp_installer, storagerouter=storagerouter) # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context partitions_mutex.acquire(wait=60) sr_installer.partition_info = StorageRouterController.get_partition_info( storagerouter_guid=storagerouter.guid) sr_installer.validate_vpool_extendable() sr_installer.validate_global_write_buffer( requested_size=parameters.get('writecache_size', 0)) sr_installer.validate_local_cache_size( requested_proxies=parameters.get('parallelism', {}).get( 'proxies', 2)) # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS sd_installer.create() sd_installer.create_partitions() partitions_mutex.release() vp_installer.refresh_metadata() except Exception: cls._logger.exception( 'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}' .format(vp_installer.name, storagerouter.name)) partitions_mutex.release() vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise # Arakoon setup counter = 0 while counter < 300: try: if StorageDriverController.manual_voldrv_arakoon_checkup( ) is True: break except Exception: cls._logger.exception( 'Arakoon checkup for voldrv cluster failed') vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise counter += 1 time.sleep(1) if counter == 300: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Arakoon checkup for the StorageDriver cluster could not be started' ) # Cluster registry try: vp_installer.configure_cluster_registry(allow_raise=True) except Exception: if vp_installer.is_new is True: vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING) else: vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE) raise try: sd_installer.setup_proxy_configs() sd_installer.configure_storagedriver_service() DiskController.sync_with_reality(storagerouter.guid) MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vp_installer.vpool) # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver) vp_installer.vpool.invalidate_dynamics('configuration') if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[ 'mds_config']['mds_safety'] != vp_installer.mds_safety: Configuration.set( key='/ovs/vpools/{0}/mds_config|mds_safety'.format( vp_installer.vpool.guid), value=vp_installer.mds_safety) sd_installer.start_services( ) # Create and start watcher volumedriver, DTL, proxies and StorageDriver services # Post creation/extension checkups mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vp_installer.vpool, offline_nodes=offline_nodes) for sr, clients in sr_client_map.iteritems(): for current_storagedriver in [ sd for sd in sr.storagedrivers if sd.vpool_guid == vp_installer.vpool.guid ]: storagedriver_config = StorageDriverConfiguration( vpool_guid=vp_installer.vpool.guid, storagedriver_id=current_storagedriver.storagedriver_id ) if storagedriver_config.config_missing is False: # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=mds_config_set[ sr.guid]) storagedriver_config.save(client=clients['ovs']) # Everything's reconfigured, refresh new cluster configuration for current_storagedriver in vp_installer.vpool.storagedrivers: if current_storagedriver.storagerouter not in sr_client_map: continue vp_installer.vpool.storagedriver_client.update_cluster_node_configs( str(current_storagedriver.storagedriver_id), req_timeout_secs=10) except Exception: cls._logger.exception('vPool {0}: Creation failed'.format( vp_installer.name)) vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise # When a node is offline, we can run into errors, but also when 1 or more volumes are not running # Scheduled tasks below, so don't really care whether they succeed or not try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except: pass for vdisk in vp_installer.vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except: pass vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info('Add vPool {0} ended successfully'.format( vp_installer.name))
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ from ovs.lib.storagedriver import StorageDriverController mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = DalService() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() scrub_partition = None db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition if DiskPartition.ROLES.SCRUB in partition.roles: scrub_partition = partition if scrub_partition is None or db_partition is None: raise RuntimeError('Could not find DB or SCRUB partition on StorageRouter {0}'.format(storagerouter.name)) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.SCRUB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': scrub_partition, 'mds_service': mds_service}) mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0], 'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.SCRUB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :type mds_service: MDSService :param vpool: The vPool for which the MDS service will be removed :type vpool: VPool :param reconfigure: Indicates whether reconfiguration is required :type reconfigure: bool :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline :type allow_offline: bool """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError("Cannot remove MDSService that is still serving disks") mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if ( allow_offline is True ): # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append( { "host": service.storagerouter.ip, "port": service.ports[0], "db_directory": sdp.path, "scratch_directory": sdp.path, } ) # Generate the correct section in the Storage Driver's configuration storagedriver = [sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid][0] storagedriver_config = StorageDriverConfiguration( "storagedriver", vpool.guid, storagedriver.storagedriver_id ) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username="******") root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: MDSServiceController._logger.debug("Recursively removed {0}".format(dir_name)) break except Exception: MDSServiceController._logger.debug("Waiting for the MDS service to go down...") time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: MDSServiceController._logger.info("Allowed offline node during mds service removal") else: raise
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status :type storagedriver_id: str :return: None """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) host_status = pmachine.host_status else: # No management Center, cannot update status via api StorageDriverController._logger.info( 'Updating status of pmachine {0} using SSHClient'.format( pmachine.name)) path = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool.guid, storagedriver.storagedriver_id).remote_path host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') StorageDriverController._logger.info( 'SSHClient connected successfully to {0} at {1}'.format( pmachine.name, client.ip)) except UnableToConnectException as ex: StorageDriverController._logger.error( 'SSHClient connectivity check failed, assuming host {0} is halted. {1}' .format(pmachine.name, ex)) host_status = 'HALTED' else: try: with remote(client.ip, [LocalStorageRouterClient]) as rem: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision() StorageDriverController._logger.info( 'LocalStorageRouterClient connected successfully to {0} at {1}' .format(pmachine.name, client.ip)) except (EOFError, RuntimeError, ClusterNotReachableException) as ex: StorageDriverController._logger.error( 'LocalStorageRouterClient check failed, assuming volumedriver on host {0} {1} is halted. {2}' .format(pmachine.name, client.ip, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load( storagedriver.vpool) storagedriver_client.mark_node_offline( str(storagedriver.storagedriver_id)) StorageDriverController._logger.warning( 'Storagedriver {0} marked offline'.format( storagedriver.storagedriver_id))
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join( random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [(admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role])] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[ entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format( filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists( stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max( sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient( storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink( {sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in { 'mountpoint_md': (DiskPartition.ROLES.DB, { 'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG }), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, { 'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE }), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, { '': None }), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, { 'sco_{0}': StorageDriverPartition.SUBROLE.SCO }) }.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[ key][:] if is_list is True else [ storagedriver._data[key] ] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove( entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote( storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems( ): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max( sd_partition. number, number) if migrated is False: sd_partition = StorageDriverPartition( ) sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format( entry, folder.format( storagedriver. vpool.name)) else: source = entry client = SSHClient( storagedriver. storagerouter, username='******') path = sd_partition.path.rsplit( '/', 1)[0] if path: client.dir_create(path) client.dir_chown( path, 'ovs', 'ovs') client.symlink({ sd_partition.path: source }) migrated_objects[ source] = sd_partition if is_list: storagedriver._data[ key].remove(entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[ key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data[ 'mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get( 'content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip( 'KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get( 'scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip( 'KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk( '/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join( [root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk( ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join( [sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run( 'cat {0}'.format( state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, { 'type': DataList.where_operator.AND, 'items': [] }) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format( rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = { 'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier'] } vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend'][ 'arakoon_config'] = vpool.metadata['backend'].pop( 'metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required Actions: * Verify which StorageRouters are available * Make mapping between vPools and its StorageRouters * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available * For each vPool retrieve the optimal configuration and store it for each StorageDriver * For each vPool run an ensure safety for all vDisks :raises RuntimeError: When ensure safety fails for any vDisk :return: None :rtype: NoneType """ MDSServiceController._logger.info('Started') # Verify StorageRouter availability root_client_cache = {} storagerouters = StorageRouterList.get_storagerouters() storagerouters.sort(key=lambda _sr: ExtensionsToolbox.advanced_sort( element=_sr.ip, separator='.')) offline_nodes = [] for storagerouter in storagerouters: try: root_client = SSHClient(endpoint=storagerouter, username='******') MDSServiceController._logger.debug( 'StorageRouter {0} - ONLINE'.format(storagerouter.name)) except UnableToConnectException: root_client = None offline_nodes.append(storagerouter) MDSServiceController._logger.error( 'StorageRouter {0} - OFFLINE'.format(storagerouter.name)) root_client_cache[storagerouter] = root_client # Create mapping per vPool and its StorageRouters mds_dict = collections.OrderedDict() for vpool in sorted(VPoolList.get_vpools(), key=lambda k: k.name): MDSServiceController._logger.info('vPool {0}'.format(vpool.name)) mds_dict[vpool] = {} # Loop all StorageDrivers and add StorageDriver to mapping for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': root_client_cache.get(storagerouter), 'services': [], 'storagedriver': storagedriver } # Loop all MDS Services and append services to appropriate vPool / StorageRouter combo mds_services = vpool.mds_services mds_services.sort( key=lambda _mds_service: ExtensionsToolbox.advanced_sort( element=_mds_service.service.storagerouter.ip, separator='.')) for mds_service in mds_services: service = mds_service.service storagerouter = service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': root_client_cache.get(storagerouter), 'services': [], 'storagedriver': None } MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Service on port {2}'. format(vpool.name, storagerouter.name, service.ports[0])) mds_dict[vpool][storagerouter]['services'].append(mds_service) failures = [] for vpool, storagerouter_info in mds_dict.iteritems(): # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0) max_load = Configuration.get( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid)) for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): total_load = 0.0 root_client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] for mds_service in list( sorted(mds_services, key=lambda k: k.number)): port = mds_service.service.ports[0] number = mds_service.number # Manual intervention required here in order for the MDS to be cleaned up # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: MDSServiceController._logger.warning( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing' .format(vpool.name, storagerouter.name, number, port)) try: MDSServiceController.remove_mds_service( mds_service=mds_service, reconfigure=True, allow_offline=root_client is None) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove' .format(vpool.name, storagerouter.name, number, port)) mds_services.remove(mds_service) else: _, next_load = MDSServiceController.get_mds_load( mds_service=mds_service) if next_load == float('inf'): total_load = sys.maxint * -1 # Cast to lowest possible value if any MDS service capacity is set to infinity else: total_load += next_load if next_load < max_load: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) else: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) if total_load >= max_load * len(mds_services): mds_services_to_add = int( math.ceil((total_load - max_load * len(mds_services)) / max_load)) MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added' .format(vpool.name, storagerouter.name, total_load / len(mds_services), max_load, mds_services_to_add, '' if mds_services_to_add == 1 else 's')) for _ in range(mds_services_to_add): MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Adding new MDS Service' .format(vpool.name, storagerouter.name)) try: mds_services.append( MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool)) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - Failed to create new MDS Service' .format(vpool.name, storagerouter.name)) # After potentially having added new MDSes, retrieve the optimal configuration mds_config_set = {} try: mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vpool, offline_nodes=offline_nodes) MDSServiceController._logger.debug( 'vPool {0} - Optimal configuration {1}'.format( vpool.name, mds_config_set)) except (NotFoundException, RuntimeError): MDSServiceController._logger.exception( 'vPool {0} - Failed to retrieve the optimal configuration'. format(vpool.name)) # Apply the optimal MDS configuration per StorageDriver for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): root_client = mds_dict[vpool][storagerouter]['client'] storagedriver = mds_dict[vpool][storagerouter]['storagedriver'] if storagedriver is None: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - No matching StorageDriver found' .format(vpool.name, storagerouter.name)) continue if storagerouter.guid not in mds_config_set: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config' .format(vpool.name, storagerouter.name)) continue if root_client is None: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver_config = StorageDriverConfiguration( vpool_guid=vpool.guid, storagedriver_id=storagedriver.storagedriver_id) if storagedriver_config.config_missing is False: optimal_mds_config = mds_config_set[storagerouter.guid] MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}' .format(vpool.name, storagerouter.name, optimal_mds_config)) # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=optimal_mds_config) storagedriver_config.save(root_client) # Execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( 'vPool {0} - Ensuring safety for all vDisks'.format( vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except Exception: message = 'Ensure safety for vDisk {0} with guid {1} failed'.format( vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) failures.append(message) if len(failures) > 0: raise RuntimeError('\n - ' + '\n - '.join(failures)) MDSServiceController._logger.info('Finished')
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()
def remove_mds_service(mds_service, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :type mds_service: ovs.dal.hybrids.j_mdsservice.MDSService :param reconfigure: Indicates whether reconfiguration is required :type reconfigure: bool :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline :type allow_offline: bool :raises RuntimeError: When vDisks present on the MDSService to be removed No StorageDriver is linked to the MDSService to be removed :raises UnableToConnectException: When StorageRouter on which the MDSService resides is unreachable and allow_offline flag is False :return: None :rtype: NoneType """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError( 'Cannot remove MDSService that is still serving disks') if len( mds_service.storagedriver_partitions ) == 0 or mds_service.storagedriver_partitions[0].storagedriver is None: raise RuntimeError( 'Failed to retrieve the linked StorageDriver to this MDS Service {0}' .format(mds_service.service.name)) vpool = mds_service.vpool root_client = None storagerouter = mds_service.service.storagerouter storagedriver = mds_service.storagedriver_partitions[0].storagedriver MDSServiceController._logger.info( 'StorageRouter {0} - vPool {1}: Removing MDS junction service for port {2}' .format(storagerouter.name, vpool.name, mds_service.service.ports[0])) try: root_client = SSHClient(endpoint=storagerouter, username='******') MDSServiceController._logger.debug( 'StorageRouter {0} - vPool {1}: Established SSH connection'. format(storagerouter.name, vpool.name)) except UnableToConnectException: if allow_offline is True: MDSServiceController._logger.warning( 'StorageRouter {0} - vPool {1}: Allowed offline node during MDS service removal' .format(storagerouter.name, vpool.name)) else: MDSServiceController._logger.exception( 'StorageRouter {0} - vPool {1}: Failed to connect to StorageRouter' .format(storagerouter.name, vpool.name)) raise # Reconfigure StorageDriver if reconfigure is True and root_client is not None: mds_nodes = [] for sd_partition in storagedriver.partitions: if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service != mds_service: service = sd_partition.mds_service.service mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/db'.format(sd_partition.path), 'scratch_directory': '{0}/scratch'.format(sd_partition.path) }) # Generate the correct section in the StorageDriver's configuration MDSServiceController._logger.info( 'StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}' .format(storagerouter.name, vpool.name, mds_nodes)) storagedriver_config = StorageDriverConfiguration( vpool.guid, storagedriver.storagedriver_id) storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(root_client) # Clean up model MDSServiceController._logger.info( 'StorageRouter {0} - vPool {1}: Cleaning model'.format( storagerouter.name, vpool.name)) directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if allow_offline is True: # Certain vDisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() # Clean up file system if root_client is not None: MDSServiceController._logger.info( 'StorageRouter {0} - vPool {1}: Deleting directories from file system: {2}' .format(storagerouter.name, vpool.name, directories_to_clean)) tries = 5 while tries > 0: try: root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: MDSServiceController._logger.debug( 'StorageRouter {0} - vPool {1}: Recursively removed directory: {2}' .format(storagerouter.name, vpool.name, dir_name)) break except Exception: MDSServiceController._logger.warning( 'StorageRouter {0} - vPool {1}: Waiting for the MDS service to go down...' .format(storagerouter.name, vpool.name)) time.sleep(5) tries -= 1 if tries == 0: MDSServiceController._logger.exception( 'StorageRouter {0} - vPool {1}: Deleting directories failed' .format(storagerouter.name, vpool.name)) raise
def mds_checkup_single(vpool_guid, mds_dict=None, offline_nodes=None): # type: (str, collections.OrderedDict, List[StorageRouter]) -> None """ Validates the current MDS setup/configuration and takes actions where required Actions: * Verify which StorageRouters are available * Make mapping between vPools and its StorageRouters * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available * For each vPool retrieve the optimal configuration and store it for each StorageDriver * For each vPool run an ensure safety for all vDisks :param vpool_guid: Guid of the VPool to do the checkup for :type vpool_guid: str :param mds_dict: OrderedDict containing all mds related information :type mds_dict: collections.OrderedDict :param offline_nodes: Nodes that are marked as unreachable :type offline_nodes: List[StorageRouter] :raises RuntimeError: When ensure safety fails for any vDisk :return: None :rtype: NoneType :raises: MDSCheckupEnsureSafetyFailures when the ensure safety has failed for any vdisk """ params_to_verify = [mds_dict, offline_nodes] vpool = VPool(vpool_guid) if any(p is not None for p in params_to_verify) and not all( p is not None for p in params_to_verify): raise ValueError( 'Both mds_dict and offline_nodes must be given instead of providing either one' ) if not mds_dict: mds_dict, offline_nodes = MDSServiceController._get_mds_information( [vpool]) ensure_safety_failures = [] storagerouter_info = mds_dict[vpool] # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0) max_load = Configuration.get( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid)) for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): total_load = 0.0 root_client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] for mds_service in list( sorted(mds_services, key=lambda k: k.number)): port = mds_service.service.ports[0] number = mds_service.number # Manual intervention required here in order for the MDS to be cleaned up # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: MDSServiceController._logger.warning( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing' .format(vpool.name, storagerouter.name, number, port)) try: MDSServiceController.remove_mds_service( mds_service=mds_service, reconfigure=True, allow_offline=root_client is None) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove' .format(vpool.name, storagerouter.name, number, port)) mds_services.remove(mds_service) else: _, next_load = MDSServiceController.get_mds_load( mds_service=mds_service) if next_load == float('inf'): total_load = sys.maxint * -1 # Cast to lowest possible value if any MDS service capacity is set to infinity else: total_load += next_load if next_load < max_load: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) else: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) if total_load >= max_load * len(mds_services): mds_services_to_add = int( math.ceil((total_load - max_load * len(mds_services)) / max_load)) MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added' .format(vpool.name, storagerouter.name, total_load / len(mds_services), max_load, mds_services_to_add, '' if mds_services_to_add == 1 else 's')) for _ in range(mds_services_to_add): MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Adding new MDS Service' .format(vpool.name, storagerouter.name)) try: mds_services.append( MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool)) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - Failed to create new MDS Service' .format(vpool.name, storagerouter.name)) # After potentially having added new MDSes, retrieve the optimal configuration mds_config_set = {} try: mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vpool, offline_nodes=offline_nodes) MDSServiceController._logger.debug( 'vPool {0} - Optimal configuration {1}'.format( vpool.name, mds_config_set)) except (NotFoundException, RuntimeError): MDSServiceController._logger.exception( 'vPool {0} - Failed to retrieve the optimal configuration'. format(vpool.name)) # Apply the optimal MDS configuration per StorageDriver for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): root_client = mds_dict[vpool][storagerouter]['client'] storagedriver = mds_dict[vpool][storagerouter]['storagedriver'] if storagedriver is None: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - No matching StorageDriver found' .format(vpool.name, storagerouter.name)) continue if storagerouter.guid not in mds_config_set: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config' .format(vpool.name, storagerouter.name)) continue if root_client is None: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver_config = StorageDriverConfiguration( vpool_guid=vpool.guid, storagedriver_id=storagedriver.storagedriver_id) if storagedriver_config.config_missing is False: optimal_mds_config = mds_config_set[storagerouter.guid] MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}' .format(vpool.name, storagerouter.name, optimal_mds_config)) # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=optimal_mds_config) storagedriver_config.save(root_client) # Execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( 'vPool {0} - Ensuring safety for all vDisks'.format(vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except Exception: message = 'Ensure safety for vDisk {0} with guid {1} failed'.format( vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) ensure_safety_failures.append(message) if ensure_safety_failures: raise MDSCheckupEnsureSafetyFailures( '\n - ' + '\n - '.join(ensure_safety_failures))
def configure_storagedriver_service(self): """ Configure the StorageDriver service :return: None :rtype: NoneType """ def _generate_queue_urls(): mq_user = Configuration.get('/ovs/framework/messagequeue|user') mq_protocol = Configuration.get('/ovs/framework/messagequeue|protocol') mq_password = Configuration.get('/ovs/framework/messagequeue|password') return [{'amqp_uri': '{0}://{1}:{2}@{3}:5672'.format(mq_protocol, mq_user, mq_password, sr.ip)} for sr in StorageRouterList.get_masters()] def _generate_config_file_system(): config = {'fs_dtl_host': '', 'fs_enable_shm_interface': 0, 'fs_enable_network_interface': 1, 'fs_metadata_backend_arakoon_cluster_nodes': [], 'fs_metadata_backend_mds_nodes': [], 'fs_metadata_backend_type': 'MDS', 'fs_virtual_disk_format': 'raw', 'fs_raw_disk_suffix': '.raw', 'fs_file_event_rules': [{'fs_file_event_rule_calls': ['Rename'], 'fs_file_event_rule_path_regex': '.*'}]} if self.dtl_mode == StorageDriverClient.FRAMEWORK_DTL_NO_SYNC: config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_MANUAL_MODE else: config['fs_dtl_mode'] = StorageDriverClient.VPOOL_DTL_MODE_MAP[self.dtl_mode] config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE return config def _generate_config_backend_connection_manager(): config = {'backend_type': 'MULTI', 'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0} for index, proxy in enumerate(sorted(self.storagedriver.alba_proxies, key=lambda k: k.service.ports[0])): config[str(index)] = {'alba_connection_host': self.storagedriver.storage_ip, 'alba_connection_port': proxy.service.ports[0], 'alba_connection_preset': vpool.metadata['backend']['backend_info']['preset'], 'alba_connection_timeout': 30, 'alba_connection_use_rora': True, 'alba_connection_transport': 'TCP', 'alba_connection_rora_manifest_cache_capacity': 25000, 'alba_connection_asd_connection_pool_capacity': 10, 'alba_connection_rora_timeout_msecs': 50, 'backend_type': 'ALBA'} return config if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') if len(self.write_caches) == 0: raise RuntimeError('The StorageDriverPartition junctions have not been created yet') vpool = self.vp_installer.vpool gap_configuration = StorageDriverController.calculate_trigger_and_backoff_gap(cache_size=self.sr_installer.smallest_write_partition_size) arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) arakoon_nodes = [{'host': node.ip, 'port': node.client_port, 'node_id': node.name} for node in ArakoonClusterConfig(cluster_id=arakoon_cluster_name).nodes] storagedriver_config = StorageDriverConfiguration(vpool.guid, self.storagedriver.storagedriver_id) storagedriver_config.configure_scocache(scocache_mount_points=self.write_caches, trigger_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['trigger']), backoff_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['backoff'])) storagedriver_config.configure_file_driver(fd_cache_path=self.storagedriver_partition_file_driver.path, fd_extent_cache_capacity='1024', fd_namespace='fd-{0}-{1}'.format(vpool.name, vpool.guid)) storagedriver_config.configure_volume_router(vrouter_id=self.storagedriver.storagedriver_id, vrouter_redirect_timeout_ms='120000', vrouter_keepalive_time_secs='15', vrouter_keepalive_interval_secs='5', vrouter_keepalive_retries='2', vrouter_routing_retries=10, vrouter_volume_read_threshold=0, vrouter_volume_write_threshold=0, vrouter_file_read_threshold=0, vrouter_file_write_threshold=0, vrouter_min_workers=4, vrouter_max_workers=16, vrouter_sco_multiplier=self.sco_size * 1024 / self.cluster_size, vrouter_backend_sync_timeout_ms=60000, vrouter_migrate_timeout_ms=60000, vrouter_use_fencing=True) storagedriver_config.configure_volume_manager(tlog_path=self.storagedriver_partition_tlogs.path, metadata_path=self.storagedriver_partition_metadata.path, clean_interval=1, dtl_throttle_usecs=4000, default_cluster_size=self.cluster_size * 1024, number_of_scos_in_tlog=self.tlog_multiplier, non_disposable_scos_factor=float(self.write_buffer) / self.tlog_multiplier / self.sco_size) storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'), events_amqp_uris=_generate_queue_urls()) storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=arakoon_cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_network_interface(network_max_neighbour_distance=StorageDriver.DISTANCES.FAR - 1) storagedriver_config.configure_threadpool_component(num_threads=16) storagedriver_config.configure_volume_router_cluster(vrouter_cluster_id=vpool.guid) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id=arakoon_cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_content_addressed_cache(serialize_read_cache=False, read_cache_serialization_path=[]) storagedriver_config.configure_distributed_transaction_log(dtl_path=self.storagedriver_partition_dtl.path, # Not used, but required dtl_transport=StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP[self.dtl_transport]) storagedriver_config.configure_filesystem(**_generate_config_file_system()) storagedriver_config.configure_backend_connection_manager(**_generate_config_backend_connection_manager()) storagedriver_config.save(client=self.sr_installer.root_client)
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = DalService() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, mds_service.number), 'scratch_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, mds_service.number)}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|mds'.format( System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( 'Failed to find an available port on storage router {0} within range {1}' .format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError( 'Could not find DB partition on storage router {0}'.format( storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [ sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid ] if not storagedrivers: raise RuntimeError( 'Expected to find a configured storagedriver for vpool {0} on storage router {1}' .format(vpool.name, storagerouter.name)) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service }) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, start=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() # Prepare some directores scratch_dir = '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, service_number) rocksdb_dir = '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, service_number) client.run('mkdir -p {0}'.format(scratch_dir)) client.run('mkdir -p {0}'.format(rocksdb_dir)) # Generate the configuration file metadataserver_config = StorageDriverConfiguration('metadataserver', vpool.name, number=service_number) metadataserver_config.load(client) metadataserver_config.clean() # Clean out obsolete values if vpool.backend_type.code == 'alba': metadataserver_config.configure_backend_connection_manager(alba_connection_host='127.0.0.1', alba_connection_port=storagedriver.alba_proxy.service.ports[0], backend_type='ALBA') else: metadataserver_config.configure_backend_connection_manager(**vpool.metadata) metadataserver_config.configure_metadata_server(mds_address=storagerouter.ip, mds_port=service.ports[0], mds_scratch_dir=scratch_dir, mds_rocksdb_path=rocksdb_dir) metadataserver_config.save(client) # Create system services params = {'<VPOOL_NAME>': vpool.name, '<SERVICE_NUMBER>': str(service_number)} template_dir = '/opt/OpenvStorage/config/templates/upstart' client.run('cp -f {0}/ovs-metadataserver.conf {0}/ovs-metadataserver_{1}_{2}.conf'.format(template_dir, vpool.name, service_number)) service_script = """ from ovs.plugin.provider.service import Service Service.add_service(package=('openvstorage', 'metadataserver'), name='metadataserver_{0}_{1}', command=None, stop_command=None, params={2}) """.format(vpool.name, service_number, params) System.exec_remote_python(client, service_script) if start is True: System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service Service.enable_service('{0}') """.format(service.name)) System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service Service.start_service('{0}') """.format(service.name)) return mds_service
def prepare_mds_service(cls, storagerouter, vpool): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and vPool are already configured with a StorageDriver and that all model-wise configurations regarding both have been completed. :param storagerouter: StorageRouter on which the MDS service will be created :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: ovs.dal.hybrids.vpool.VPool :raises RuntimeError: vPool is not extended on StorageRouter No ServiceType found for 'MetadataServer' No free port is found for the new MDSService No partition found on StorageRouter with DB role :return: Newly created junction service :rtype: ovs.dal.hybrids.j_mdsservice.MDSService """ from ovs.lib.storagedriver import StorageDriverController # Import here to prevent from circular imports cls._logger.info('StorageRouter {0} - vPool {1}: Preparing MDS junction service'.format(storagerouter.name, vpool.name)) mds_service = MDSService() with volatile_mutex(name='prepare_mds_{0}'.format(storagerouter.guid), wait=30): # VALIDATIONS # Verify passed StorageRouter is part of the vPool storagerouter.invalidate_dynamics(['vpools_guids']) if vpool.guid not in storagerouter.vpools_guids: raise RuntimeError('StorageRouter {0} is not part of vPool {1}'.format(storagerouter.name, vpool.name)) # Verify ServiceType existence mds_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) if mds_service_type is None: raise RuntimeError('No ServiceType found with name {0}'.format(ServiceType.SERVICE_TYPES.MD_SERVER)) # Retrieve occupied ports for current StorageRouter and max MDSService number for current vPool/StorageRouter combo service_number = -1 occupied_ports = [] for service in mds_service_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) if service.mds_service.vpool_guid == vpool.guid: service_number = max(service.mds_service.number, service_number) client = SSHClient(endpoint=storagerouter) mds_port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|mds'.format(System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, amount=1, client=client) if len(free_ports) != 1: raise RuntimeError('Failed to find an available port on StorageRouter {0} within range {1}'.format(storagerouter.name, mds_port_range)) # Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on StorageRouter {0}'.format(storagerouter.name)) # Verify StorageDriver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if len(storagedrivers) != 1: raise RuntimeError('Expected to find a configured StorageDriver for vPool {0} on StorageRouter {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # Service and MDS service service_number += 1 cls._logger.info('StorageRouter {0} - vPool {1}: Adding junction service with number {2}'.format(storagerouter.name, vpool.name, service_number)) service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mds_service_type service.ports = free_ports service.storagerouter = storagerouter service.save() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # StorageDriver partitions cls._logger.info('StorageRouter {0} - vPool {1}: Adding StorageDriverPartition on partition with mount point {2}'.format(storagerouter.name, vpool.name, db_partition.mountpoint)) storagedriver = storagedrivers[0] sdp = StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # Volumedriver mds_nodes = [] for sd_partition in storagedriver.partitions: if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service is not None: service = sd_partition.mds_service.service mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/db'.format(sd_partition.path), 'scratch_directory': '{0}/scratch'.format(sd_partition.path)}) cls._logger.info('StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}'.format(storagerouter.name, vpool.name, mds_nodes)) # Generate the correct section in the StorageDriver's configuration try: storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client) except Exception: cls._logger.exception('StorageRouter {0} - vPool {1}: Configuring StorageDriver failed. Reverting model changes'.format(storagerouter.name, vpool.name)) # Clean up model changes if error occurs sdp.delete() mds_service.delete() # Must be removed before the service service.delete() return mds_service
def start_services(self): """ Start all services related to the Storagedriver :return: None :rtype: NoneType """ if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') vpool = self.vp_installer.vpool root_client = self.sr_installer.root_client storagerouter = self.sr_installer.storagerouter alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA) voldrv_pkg_name, voldrv_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) # Add/start watcher volumedriver service if not self.service_manager.has_service(name=ServiceFactory.SERVICE_WATCHER_VOLDRV, client=root_client): self.service_manager.add_service(name=ServiceFactory.SERVICE_WATCHER_VOLDRV, client=root_client) self.service_manager.start_service(name=ServiceFactory.SERVICE_WATCHER_VOLDRV, client=root_client) # Add/start DTL service self.service_manager.add_service(name=self.SERVICE_TEMPLATE_DTL, params={'DTL_PATH': self.storagedriver_partition_dtl.path, 'DTL_ADDRESS': self.storagedriver.storage_ip, 'DTL_PORT': str(self.storagedriver.ports['dtl']), 'DTL_TRANSPORT': StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP[self.dtl_transport], 'LOG_SINK': Logger.get_sink_path('storagedriver-dtl_{0}'.format(self.storagedriver.storagedriver_id)), 'VOLDRV_PKG_NAME': voldrv_pkg_name, 'VOLDRV_VERSION_CMD': voldrv_version_cmd}, client=root_client, target_name=self.dtl_service) self.service_manager.start_service(name=self.dtl_service, client=root_client) # Add/start ALBA proxy services for proxy in self.storagedriver.alba_proxies: alba_proxy_service = 'ovs-{0}'.format(proxy.service.name) self.service_manager.add_service(name=self.SERVICE_TEMPLATE_PROXY, params={'VPOOL_NAME': vpool.name, 'LOG_SINK': Logger.get_sink_path(proxy.service.name), 'CONFIG_PATH': Configuration.get_configuration_path('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, proxy.guid)), 'ALBA_PKG_NAME': alba_pkg_name, 'ALBA_VERSION_CMD': alba_version_cmd}, client=root_client, target_name=alba_proxy_service) self.service_manager.start_service(name=alba_proxy_service, client=root_client) # Add/start StorageDriver service self.service_manager.add_service(name=self.SERVICE_TEMPLATE_SD, params={'KILL_TIMEOUT': '30', 'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': self.storagedriver.mountpoint, 'CONFIG_PATH': StorageDriverConfiguration(vpool_guid=vpool.guid, storagedriver_id=self.storagedriver.storagedriver_id).remote_path, 'OVS_UID': root_client.run(['id', '-u', 'ovs']).strip(), 'OVS_GID': root_client.run(['id', '-g', 'ovs']).strip(), 'LOG_SINK': Logger.get_sink_path('storagedriver_{0}'.format(self.storagedriver.storagedriver_id)), 'VOLDRV_PKG_NAME': voldrv_pkg_name, 'VOLDRV_VERSION_CMD': voldrv_version_cmd, 'METADATASTORE_BITS': 5}, client=root_client, target_name=self.sd_service) current_startup_counter = self.storagedriver.startup_counter self.service_manager.start_service(name=self.sd_service, client=root_client) tries = 60 while self.storagedriver.startup_counter == current_startup_counter and tries > 0: self._logger.debug('Waiting for the StorageDriver to start up for vPool {0} on StorageRouter {1} ...'.format(vpool.name, storagerouter.name)) if self.service_manager.get_service_status(name=self.sd_service, client=root_client) != 'active': raise RuntimeError('StorageDriver service failed to start (service not running)') tries -= 1 time.sleep(60 - tries) self.storagedriver.discard() if self.storagedriver.startup_counter == current_startup_counter: raise RuntimeError('StorageDriver service failed to start (got no event)') self._logger.debug('StorageDriver running')
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :param vpool: The vPool for which the MDS service will be removed :param reconfigure: Indicates whether reconfiguration is required :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if allow_offline is True: # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB] mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp[0], 'scratch_directory': sdp[0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: logger.debug('Recursively removed {0}'.format(dir_name)) break except Exception: logger.debug('Waiting for the MDS service to go down...') time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: logger.info('Allowed offline node during mds service removal') else: raise
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( "/ovs/framework/hosts/{0}/ports|mds".format(System.get_my_machine_id(client)) ) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( "Failed to find an available port on storage router {0} within range {1}".format( storagerouter.name, mds_port_range ) ) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError("Could not find DB partition on storage router {0}".format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError( "Expected to find a configured storagedriver for vpool {0} on storage router {1}".format( vpool.name, storagerouter.name ) ) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = "metadataserver_{0}_{1}".format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": db_partition, "mds_service": mds_service, }, ) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append( { "host": service.storagerouter.ip, "port": service.ports[0], "db_directory": sdp.path, "scratch_directory": sdp.path, } ) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration("storagedriver", vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again * Eg: /ovs/framework/migration|stats_monkey_integration: True """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.db.arakooninstaller import ArakoonInstaller from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator from ovs.extensions.packages.packagefactory import PackageFactory from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip, # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name), new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name)) # Register new service and remove old service service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']: proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) if service_manager.__class__ == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information vpools = VPoolList.get_vpools() for vpool in vpools: bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() ##################################### # Update the vPool metadata structure def _update_metadata_structure(metadata): metadata = copy.deepcopy(metadata) cache_structure = {'read': False, 'write': False, 'is_backend': False, 'quota': None, 'backend_info': {'name': None, # Will be filled in when is_backend is true 'backend_guid': None, 'alba_backend_guid': None, 'policies': None, 'preset': None, 'arakoon_config': None, 'connection_info': {'client_id': None, 'client_secret': None, 'host': None, 'port': None, 'local': None}} } structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read', 'write': 'block_cache_on_write', 'quota': 'quota_bc', 'backend_prefix': 'backend_bc_{0}'}, StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read', 'write': 'fragment_cache_on_write', 'quota': 'quota_fc', 'backend_prefix': 'backend_aa_{0}'}} if 'arakoon_config' in metadata['backend']: # Arakoon config should be placed under the backend info metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config') if 'connection_info' in metadata['backend']: # Connection info sohuld be placed under the backend info metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info') if 'caching_info' not in metadata: # Caching info is the new key would_be_caching_info = {} metadata['caching_info'] = would_be_caching_info # Extract all caching data for every storagerouter current_caching_info = metadata['backend'].pop('caching_info') # Pop to mutate metadata for storagerouter_guid in current_caching_info.iterkeys(): current_cache_data = current_caching_info[storagerouter_guid] storagerouter_caching_info = {} would_be_caching_info[storagerouter_guid] = storagerouter_caching_info for cache_type, cache_type_mapping in structure_map.iteritems(): new_cache_structure = copy.deepcopy(cache_structure) storagerouter_caching_info[cache_type] = new_cache_structure for new_structure_key, old_structure_key in cache_type_mapping.iteritems(): if new_structure_key == 'backend_prefix': # Get possible backend related info metadata_key = old_structure_key.format(storagerouter_guid) if metadata_key not in metadata: continue backend_data = metadata.pop(metadata_key) # Pop to mutate metadata new_cache_structure['is_backend'] = True # Copy over the old data new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config'] new_cache_structure['backend_info'].update(backend_data['backend_info']) new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info']) else: new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key) return metadata vpools = VPoolList.get_vpools() for vpool in vpools: try: new_metadata = _update_metadata_structure(vpool.metadata) vpool.metadata = new_metadata vpool.save() except KeyError: MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name)) ############################################## # Always use indent=4 during Configuration set def _resave_all_config_entries(config_path='/ovs'): """ Recursive functions which checks every config management key if its a directory or not. If not a directory, we retrieve the config and just save it again using the new indentation logic """ for item in Configuration.list(config_path): new_path = config_path + '/' + item print new_path if Configuration.dir_exists(new_path) is True: _resave_all_config_entries(config_path=new_path) else: try: _config = Configuration.get(new_path) Configuration.set(new_path, _config) except: _config = Configuration.get(new_path, raw=True) Configuration.set(new_path, _config, raw=True) if ExtensionMigrator.THIS_VERSION <= 13: # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower MigrationController._logger.info('Re-saving every configuration setting with new indentation rules') _resave_all_config_entries() ############################ # Update some default values def _update_manifest_cache_size(_proxy_config_key): updated = False manifest_cache_size = 500 * 1024 * 1024 if Configuration.exists(key=_proxy_config_key): _proxy_config = Configuration.get(key=_proxy_config_key) for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]: if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba': if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size if _proxy_config['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config['manifest_cache_size'] = manifest_cache_size if updated is True: Configuration.set(key=_proxy_config_key, value=_proxy_config) return updated for storagedriver in StorageDriverList.get_storagedrivers(): try: vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services for alba_proxy in storagedriver.alba_proxies: if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True: # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name)) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] for key, value in current_config.iteritems(): if key.isdigit() is True: if value.get('alba_connection_asd_connection_pool_capacity') != 10: changes = True value['alba_connection_asd_connection_pool_capacity'] = 10 if value.get('alba_connection_timeout') != 30: changes = True value['alba_connection_timeout'] = 30 if value.get('alba_connection_rora_manifest_cache_capacity') != 25000: changes = True value['alba_connection_rora_manifest_cache_capacity'] = 25000 if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**current_config) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) except Exception: MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id)) #################################################### # Adding proxy fail fast as env variable for proxies changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-albaproxy_'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'Environment=ALBA_FAIL_FAST=true' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'env ALBA_FAIL_FAST=true' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ###################################### # Integration of stats monkey (2.10.2) if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False: try: # Get content of old key into new key old_stats_monkey_key = '/statsmonkey/statsmonkey' if Configuration.exists(key=old_stats_monkey_key) is True: Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key)) Configuration.delete(key=old_stats_monkey_key) # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before celery_key = '/ovs/framework/scheduling/celery' current_value = None scheduling_config = Configuration.get(key=celery_key, default={}) if 'statsmonkey.run_all_stats' in scheduling_config: # Old celery task name of the stats monkey current_value = scheduling_config.pop('statsmonkey.run_all_stats') scheduling_config['ovs.stats_monkey.run_all'] = current_value scheduling_config['alba.stats_monkey.run_all'] = current_value Configuration.set(key=celery_key, value=scheduling_config) support_key = '/ovs/framework/support' support_config = Configuration.get(key=support_key) support_config['support_agent'] = support_config.pop('enabled', True) support_config['remote_access'] = support_config.pop('enablesupport', False) Configuration.set(key=support_key, value=support_config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True) except Exception: MigrationController._logger.exception('Integration of stats monkey failed') ###################################################### # Write away cluster ID to a file for back-up purposes try: cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None) with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file: config = json.load(config_file) if cluster_id is not None and config.get('cluster_id', None) is None: config['cluster_id'] = cluster_id with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file: json.dump(config, config_file, indent=4) except Exception: MigrationController._logger.exception('Writing cluster id to a file failed.') ######################################################### # Additional string formatting in Arakoon services (2.11) try: if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False: arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')] for storagerouter in StorageRouterList.get_masters(): for service_name in arakoon_service_names: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON Configuration.set(key=config_key, value=config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in ALBA proxy services (2.11) changed_clients = set() try: if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA) for service in ServiceTypeList.get_by_name('AlbaProxy').services: root_client = sr_client_map[service.storagerouter_guid] config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = alba_pkg_name config['ALBA_VERSION_CMD'] = alba_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name='ovs-{0}'.format(service.name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in DTL/VOLDRV services (2.11) try: if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False: sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: root_client = sr_client_map[storagedriver.storagerouter_guid] for entry in ['dtl', 'volumedriver']: service_name = '{0}_{1}'.format(entry, vpool.name) service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['VOLDRV_PKG_NAME'] = sd_pkg_name config['VOLDRV_VERSION_CMD'] = sd_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=service_template, client=root_client, target_name='ovs-{0}'.format(service_name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ####################################################### # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876) if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False: try: voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map.get(storagerouter.guid) if root_client is None: continue for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) regenerate = False if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER: if 'volumedriver-server' in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER) root_client.file_write(filename=file_path, contents=contents) elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE: if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE) contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE) root_client.file_write(filename=file_path, contents=contents) if regenerate is True: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD, client=root_client, target_name='ovs-{0}'.format(file_name.split('.')[0])) # Leave out .version changed_clients.add(root_client) Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True) except Exception: MigrationController._logger.exception('Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed') ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) MigrationController._logger.info('Finished out of band migrations')
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required """ logger.info('MDS checkup - Started') mds_dict = {} for vpool in VPoolList.get_vpools(): logger.info('MDS checkup - vPool {0}'.format(vpool.name)) mds_dict[vpool] = {} for mds_service in vpool.mds_services: storagerouter = mds_service.service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = {'client': None, 'services': []} try: client = SSHClient(storagerouter, username = '******') client.run('pwd') mds_dict[vpool][storagerouter]['client'] = client logger.info('MDS checkup - vPool {0} - Storage Router {1} - ONLINE'.format(vpool.name, storagerouter.name)) except UnableToConnectException: logger.info('MDS checkup - vPool {0} - Storage Router {1} - OFFLINE'.format(vpool.name, storagerouter.name)) mds_dict[vpool][storagerouter]['services'].append(mds_service) failures = [] max_load = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_maxload') for vpool, storagerouter_info in mds_dict.iteritems(): # 1. First, make sure there's at least one MDS on every StorageRouter that's not overloaded # If not, create an extra MDS for that StorageRouter for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] has_room = False for mds_service in mds_services[:]: if mds_service.capacity == 0 and len(mds_service.vdisks_guids) == 0: logger.info('MDS checkup - Removing mds_service {0} for vPool {1}'.format(mds_service.number, vpool.name)) MDSServiceController.remove_mds_service(mds_service, vpool, reconfigure=True, allow_offline=client is None) mds_services.remove(mds_service) for mds_service in mds_services: _, load = MDSServiceController.get_mds_load(mds_service) if load < max_load: has_room = True break logger.info('MDS checkup - vPool {0} - Storage Router {1} - Capacity available: {2}'.format(vpool.name, storagerouter.name, has_room)) if has_room is False and client is not None: mds_service = MDSServiceController.prepare_mds_service(storagerouter=storagerouter, vpool=vpool, fresh_only=False, reload_config=True) if mds_service is None: raise RuntimeError('Could not add MDS node') mds_services.append(mds_service) mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(vpool, True) for storagerouter in storagerouter_info: client = mds_dict[vpool][storagerouter]['client'] if client is None: logger.info('MDS checkup - vPool {0} - Storage Router {1} - Marked as offline, not setting default MDS configuration'.format(vpool.name, storagerouter.name)) continue storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) if storagedriver_config.is_new is False: logger.info('MDS checkup - vPool {0} - Storage Router {1} - Storing default MDS configuration: {2}'.format(vpool.name, storagerouter.name, mds_config_set[storagerouter.guid])) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_filesystem(fs_metadata_backend_mds_nodes=mds_config_set[storagerouter.guid]) storagedriver_config.save(client) # 2. Per VPool, execute a safety check, making sure the master/slave configuration is optimal. logger.info('MDS checkup - vPool {0} - Ensuring safety for all virtual disks'.format(vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk) except Exception as ex: failures.append('Ensure safety for vDisk {0} with guid {1} failed with error: {2}'.format(vdisk.name, vdisk.guid, ex)) if len(failures) > 0: raise Exception('\n - ' + '\n - '.join(failures)) logger.info('MDS checkup - Finished')