def test_arakoon_collapse(self): """ Test the Arakoon collapse functionality """ # Set up the test structure = DalHelper.build_dal_structure( structure={'storagerouters': [1, 2]}) storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] MockedSSHClient._run_returns[storagerouter_1.ip] = {} MockedSSHClient._run_returns[storagerouter_2.ip] = {} # Make sure we cover all Arakoon cluster types clusters_to_create = { ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{ 'name': 'unittest-voldrv', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{ 'name': 'unittest-cacc', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{ 'name': 'unittest-ovsdb', 'internal': True, 'success': False }], ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{ 'name': 'unittest-cluster-1-abm', 'internal': True, 'success': False }, { 'name': 'unittest-random-abm-name', 'internal': False, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{ 'name': 'unittest-cluster-1-nsm_0', 'internal': True, 'success': True }] } self.assertEqual( first=sorted(clusters_to_create.keys()), second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()), msg= 'An Arakoon cluster type has been removed or added, please update this test accordingly' ) # Create all Arakoon clusters and related services failed_clusters = [] external_clusters = [] successful_clusters = [] for cluster_type, cluster_infos in clusters_to_create.iteritems(): filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG for cluster_info in cluster_infos: internal = cluster_info['internal'] cluster_name = cluster_info['name'] base_dir = DalHelper.CLUSTER_DIR.format(cluster_name) arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir=base_dir, internal=internal) arakoon_installer.start_cluster() arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir=base_dir) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) else: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) if internal is True: DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_1, ports=arakoon_installer.ports[storagerouter_1.ip]) DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_2, ports=arakoon_installer.ports[storagerouter_2.ip]) else: DalHelper.create_service(service_name=service_name, service_type=service_type) external_clusters.append(cluster_name) continue if cluster_info['success'] is True: if filesystem is True: config_path = ArakoonClusterConfig.CONFIG_FILE.format( cluster_name) else: config_path = Configuration.get_configuration_path( ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) MockedSSHClient._run_returns[storagerouter_1.ip][ 'arakoon --collapse-local 1 2 -config {0}'.format( config_path)] = None MockedSSHClient._run_returns[storagerouter_2.ip][ 'arakoon --collapse-local 2 2 -config {0}'.format( config_path)] = None successful_clusters.append(cluster_name) else: # For successful False clusters we don't emulate the collapse, thus making it fail failed_clusters.append(cluster_name) # Start collapse and make it fail for all clusters on StorageRouter 2 SSHClient._raise_exceptions[storagerouter_2.ip] = { 'users': ['ovs'], 'exception': UnableToConnectException('No route to host') } GenericController.collapse_arakoon() # Verify all log messages for each type of cluster generic_logs = Logger._logs.get('lib', {}) for cluster_name in successful_clusters + failed_clusters + external_clusters: collect_msg = ( 'DEBUG', 'Collecting info for cluster {0}'.format(cluster_name)) unreachable_msg = ( 'ERROR', 'Could not collapse any cluster on {0} (not reachable)'.format( storagerouter_2.name)) end_collapse_msg = ( 'DEBUG', 'Collapsing cluster {0} on {1} completed'.format( cluster_name, storagerouter_1.ip)) start_collapse_msg = ('DEBUG', 'Collapsing cluster {0} on {1}'.format( cluster_name, storagerouter_1.ip)) failed_collapse_msg = ( 'ERROR', 'Collapsing cluster {0} on {1} failed'.format( cluster_name, storagerouter_1.ip)) messages_to_validate = [] if cluster_name in successful_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) elif cluster_name in failed_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(failed_collapse_msg) else: assert_function = self.assertNotIn messages_to_validate.append(collect_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) for severity, message in messages_to_validate: if assert_function == self.assertIn: assert_message = 'Expected to find log message: {0}'.format( message) else: assert_message = 'Did not expect to find log message: {0}'.format( message) assert_function(member=message, container=generic_logs, msg=assert_message) if assert_function == self.assertIn: self.assertEqual( first=severity, second=generic_logs[message], msg='Log message {0} is of severity {1} expected {2}'. format(message, generic_logs[message], severity)) # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed for general_message in [ 'Arakoon collapse started', 'Arakoon collapse finished' ]: self.assertIn(member=general_message, container=generic_logs, msg='Expected to find log message: {0}'.format( general_message))
def extend_arakoon(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir, service_type=ServiceType.ARAKOON_CLUSTER_TYPES.FWK, clustered_nodes=None): """ Adds a external arakoon to a storagerouter :param cluster_name: name of the already existing arakoon cluster :type cluster_name: str :param master_storagerouter_ip: master ip address of the existing arakoon cluster e.g. 10.100.199.11 :type master_storagerouter_ip: str :param storagerouter_ip: ip of a new storagerouter to extend to e.g. 10.100.199.12 :type storagerouter_ip: str :param cluster_basedir: absolute path for the new arakoon cluster :type cluster_basedir: str :param service_type: type of plugin for arakoon (DEFAULT=ServiceType.ARAKOON_CLUSTER_TYPES.FWK) * FWK * ABM * NSM :type service_type: ovs.dal.hybrids.ServiceType.ARAKOON_CLUSTER_TYPES :param clustered_nodes: nodes who are available for the arakoon (including the to be extended_arakoon) e.g. ['10.100.199.11', '10.100.199.12'] (DEFAULT=[]) :type clustered_nodes: list :return: is created or not :rtype: bool """ if clustered_nodes is None: clustered_nodes = [] client = SSHClient(storagerouter_ip, username='******') # create required directories if not client.dir_exists(cluster_basedir): client.dir_create(cluster_basedir) ArakoonSetup.LOGGER.info( "Starting extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`" .format(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir)) arakoon_installer = ArakoonInstaller(cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter_ip, base_dir=cluster_basedir, locked=False, log_sinks=Logger.get_sink_path('automation_lib_arakoon_server'), crash_log_sinks=Logger.get_sink_path( 'automation_lib_arakoon_server_crash')) if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: client.run([ 'ln', '-s', '/usr/lib/alba/albamgr_plugin.cmxs', '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name) ]) elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: client.run([ 'ln', '-s', '/usr/lib/alba/nsm_host_plugin.cmxs', '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name) ]) # checking if we need to restart the given nodes if len(clustered_nodes) != 0: ArakoonSetup.LOGGER.info( "Trying to restart all given nodes of arakoon: {0}".format( clustered_nodes, cluster_name)) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter_ip) ArakoonSetup.LOGGER.info( "Finished restarting all given nodes of arakoon: {0}".format( clustered_nodes, cluster_name)) ArakoonSetup.LOGGER.info( "Finished extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`" .format(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir))
def _voldrv_arakoon_checkup(create_cluster): def _add_service(service_storagerouter, arakoon_ports, service_name): """ Add a service to the storage router """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = arakoon_ports new_service.storagerouter = service_storagerouter new_service.save() return new_service current_ips = [] current_services = [] service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is not None: arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in service_type.services: if service.name == arakoon_service_name: current_services.append(service) if service.is_internal is True: current_ips.append(service.storagerouter.ip) all_sr_ips = [ storagerouter.ip for storagerouter in StorageRouterList.get_slaves() ] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition( storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len( current_services) == 0: # Create new cluster metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD) if metadata is None: # No externally managed cluster found, we create 1 ourselves if not available_storagerouters: raise RuntimeError( 'Could not find any Storage Router with a DB role') storagerouter, partition = available_storagerouters.items()[0] arakoon_voldrv_cluster = 'voldrv' arakoon_installer = ArakoonInstaller( cluster_name=arakoon_voldrv_cluster) arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(arakoon_voldrv_cluster)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format( arakoon_voldrv_cluster))) arakoon_installer.start_cluster() ports = arakoon_installer.ports[storagerouter.ip] metadata = arakoon_installer.metadata current_ips.append(storagerouter.ip) else: ports = [] storagerouter = None cluster_name = metadata['cluster_name'] Configuration.set('/ovs/framework/arakoon_clusters|voldrv', cluster_name) StorageDriverController._logger.info( 'Claiming {0} managed arakoon cluster: {1}'.format( 'externally' if storagerouter is None else 'internally', cluster_name)) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name) current_services.append( _add_service( service_storagerouter=storagerouter, arakoon_ports=ports, service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name))) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is None: return metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if 0 < len(current_services) < len( available_storagerouters) and metadata['internal'] is True: for storagerouter, partition in available_storagerouters.iteritems( ): if storagerouter.ip in current_ips: continue arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(cluster_name)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format(cluster_name))) _add_service( service_storagerouter=storagerouter, arakoon_ports=arakoon_installer.ports[storagerouter.ip], service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name)) current_ips.append(storagerouter.ip) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) service_manager = ServiceFactory.get_manager() if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for promoting a node.' ) target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_ip) arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) service_manager.register_service( node_name=machine_id, service_metadata=arakoon_installer.service_metadata[cluster_ip] ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) arakoon_ports = arakoon_installer.ports[cluster_ip] if configure_memcached is True: NodeTypeController.configure_memcached( client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [ s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip ]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq( client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying RabbitMQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=0400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run([ 'rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname) ]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues ServiceFactory.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode( client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if service_manager.has_service(service, client=target_client): ServiceFactory.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run( ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set( '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def ensure_nsm_clusters_load(cls, alba_backend, nsms_per_storagerouter=None, min_internal_nsms=1, external_nsm_cluster_names=None, version_str=None, ssh_clients=None): # type: (AlbaBackend, Optional[Dict[StorageRouter, int]], Optional[int], Optional[List[str], Optional[str]], Optional[StorageRouter, SSHClient]) -> None """ Ensure that all NSM clusters are not overloaded :param alba_backend: Alba Backend to ensure NSM Cluster load for :type alba_backend: AlbaBackend :param nsms_per_storagerouter: Amount of NSMs mapped by StorageRouter :type nsms_per_storagerouter: Dict[StorageRouter, int] :param min_internal_nsms: Minimum amount of NSM hosts that need to be provided :type min_internal_nsms: int :param external_nsm_cluster_names: Information about the additional clusters to claim (only for externally managed Arakoon clusters) :type external_nsm_cluster_names: list :param version_str: Alba version string :type version_str: str :param ssh_clients: SSHClients to use :type ssh_clients: Dict[Storagerouter, SSHClient] :return: None :rtype: NoneType """ if ssh_clients is None: ssh_clients = {} if external_nsm_cluster_names is None: external_nsm_cluster_names = [] nsms_per_storagerouter = nsms_per_storagerouter if nsms_per_storagerouter is not None else cls.get_nsms_per_storagerouter( alba_backend) version_str = version_str or AlbaArakoonInstaller.get_alba_version_string( ) nsm_loads = cls.get_nsm_loads(alba_backend) internal = AlbaArakoonInstaller.is_internally_managed(alba_backend) abm_cluster_name = alba_backend.abm_cluster.name safety = Configuration.get( '/ovs/framework/plugins/alba/config|nsm.safety') maxload = Configuration.get( '/ovs/framework/plugins/alba/config|nsm.maxload') overloaded = min(nsm_loads.values()) >= maxload if not overloaded: # At least 1 NSM is not overloaded yet AlbaArakoonController._logger.debug( 'ALBA Backend {0} - NSM load OK'.format(alba_backend.name)) if internal: # When load is not OK, deploy at least 1 additional NSM nsms_to_add = max(0, min_internal_nsms - len(nsm_loads)) else: nsms_to_add = len(external_nsm_cluster_names) if nsms_to_add == 0: return else: AlbaArakoonController._logger.warning( 'ALBA Backend {0} - NSM load is NOT OK'.format( alba_backend.name)) if internal: # When load is not OK, deploy at least 1 additional NSM nsms_to_add = max(1, min_internal_nsms - len(nsm_loads)) else: # For externally managed clusters we only claim the specified clusters, if none provided, we just log it nsms_to_add = len(external_nsm_cluster_names) if nsms_to_add == 0: cls._logger.critical( 'ALBA Backend {0} - All NSM clusters are overloaded'. format(alba_backend.name)) return # Deploy new (internal) or claim existing (external) NSM clusters cls._logger.debug( 'ALBA Backend {0} - Currently {1} NSM cluster{2}'.format( alba_backend.name, len(nsm_loads), '' if len(nsm_loads) == 1 else 's')) AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Trying to add {1} NSM cluster{2}'.format( alba_backend.name, nsms_to_add, '' if nsms_to_add == 1 else 's')) base_number = max(nsm_loads.keys()) + 1 for index, number in enumerate( xrange(base_number, base_number + nsms_to_add)): if not internal: # External clusters master_client = None if not ssh_clients: for storagerouter in StorageRouterList.get_masters(): try: master_client = SSHClient(storagerouter) except UnableToConnectException: cls._logger.warning( 'StorageRouter {0} with IP {1} is not reachable' .format(storagerouter.name, storagerouter.ip)) else: for storagerouter, ssh_client in ssh_clients.iteritems(): if storagerouter.node_type == 'MASTER': master_client = ssh_client if not master_client: raise ValueError('Could not find an online master node') # @todo this might raise an indexerror? nsm_cluster_name = external_nsm_cluster_names[index] cls._logger.debug( 'ALBA Backend {0} - Claiming NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name)) metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM, cluster_name=nsm_cluster_name) if metadata is None: cls._logger.critical( 'ALBA Backend {0} - NSM cluster with name {1} could not be found' .format(alba_backend.name, nsm_cluster_name)) continue cls._logger.debug( 'ALBA Backend {0} - Modeling services'.format( alba_backend.name)) AlbaArakoonInstaller.model_arakoon_service( alba_backend=alba_backend, cluster_name=nsm_cluster_name, number=number) cls._logger.debug('ALBA Backend {0} - Registering NSM'.format( alba_backend.name)) NSMInstaller.register_nsm(abm_name=abm_cluster_name, nsm_name=nsm_cluster_name, ip=master_client.ip) AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Extended cluster'.format( alba_backend.name)) else: # Internal clusters nsm_cluster_name = '{0}-nsm_{1}'.format( alba_backend.name, number) cls._logger.debug( 'ALBA Backend {0} - Adding NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name)) # One of the NSM nodes is overloaded. This means the complete NSM is considered overloaded # Figure out which StorageRouters are the least occupied loads = sorted(nsms_per_storagerouter.values())[:safety] storagerouters = [] for storagerouter, load in nsms_per_storagerouter.iteritems(): if load in loads: storagerouters.append(storagerouter) if len(storagerouters) == safety: break # Creating a new NSM cluster for sub_index, storagerouter in enumerate(storagerouters): nsms_per_storagerouter[storagerouter] += 1 partition = AlbaArakoonInstaller.get_db_partition( storagerouter) arakoon_installer = ArakoonInstaller( cluster_name=nsm_cluster_name) # @todo Use deploy and extend code. (Disable register nsm in those parts) if sub_index == 0: arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM, ip=storagerouter.ip, base_dir=partition.folder, plugins={NSM_PLUGIN: version_str}) else: cls._logger.debug( 'ALBA Backend {0} - Extending NSM cluster {1}'. format(alba_backend.name, nsm_cluster_name)) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, plugins={NSM_PLUGIN: version_str}) cls._logger.debug( 'ALBA Backend {0} - Linking plugins'.format( alba_backend.name)) ssh_client = ssh_clients.get(storagerouter) or SSHClient( StorageRouter) AlbaArakoonInstaller.link_plugins( client=ssh_client, data_dir=partition.folder, plugins=[NSM_PLUGIN], cluster_name=nsm_cluster_name) cls._logger.debug( 'ALBA Backend {0} - Modeling services'.format( alba_backend.name)) AlbaArakoonInstaller.model_arakoon_service( alba_backend=alba_backend, cluster_name=nsm_cluster_name, ports=arakoon_installer.ports[storagerouter.ip], storagerouter=storagerouter, number=number) if sub_index == 0: cls._logger.debug( 'ALBA Backend {0} - Starting cluster'.format( alba_backend.name)) arakoon_installer.start_cluster() else: AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Restarting cluster'.format( alba_backend.name)) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) cls._logger.debug('ALBA Backend {0} - Registering NSM'.format( alba_backend.name)) NSMInstaller.register_nsm(abm_name=abm_cluster_name, nsm_name=nsm_cluster_name, ip=storagerouters[0].ip) cls._logger.debug( 'ALBA Backend {0} - Added NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name))
def test_nsm_checkup_external(self): """ Validates whether the NSM checkup works for externally managed Arakoon clusters """ Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1) Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10) structure = DalHelper.build_dal_structure(structure={'storagerouters': [1, 2, 3]}) alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]}) alba_backend = alba_structure['alba_backends'][1] storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] # Validate some logic for externally managed arakoons during NSM checkup with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(external_nsm_cluster_names=['test']) # No ALBA Backend specified self.assertEqual(first=str(raise_info.exception), second='Additional NSMs can only be configured for a specific ALBA Backend') with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, min_internal_nsms=2, external_nsm_cluster_names=['test']) self.assertEqual(first=str(raise_info.exception), second="'min_internal_nsms' and 'external_nsm_cluster_names' are mutually exclusive") with self.assertRaises(ValueError) as raise_info: # noinspection PyTypeChecker AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names={}) # NSM cluster names must be a list self.assertEqual(first=str(raise_info.exception), second="'external_nsm_cluster_names' must be of type 'list'") with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=['non-existing-cluster']) # non-existing cluster names should raise self.assertEqual(first=str(raise_info.exception), second="Arakoon cluster with name non-existing-cluster does not exist") # Create an external ABM and NSM Arakoon cluster external_abm_1 = 'backend_1-abm' external_nsm_1 = 'backend_1-nsm_0' external_nsm_2 = 'backend_1-nsm_1' for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir='/tmp', internal=False) arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir='/tmp') arakoon_installer.start_cluster() arakoon_installer.unclaim_cluster() self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': False}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)) # Let the 'add_cluster` claim the externally managed clusters and model the services Logger._logs = {} AlbaController.add_cluster(alba_backend_guid=alba_backend.guid, abm_cluster=external_abm_1, nsm_clusters=[external_nsm_1]) # Only claim external_nsm_1 for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': False if cluster_name == external_nsm_2 else True}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)) log_found = False for log_record in Logger._logs.get('lib', []): if 'NSM load OK' in log_record: log_found = True break self.assertTrue(expr=log_found) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'], ['add_nsm_host', 'backend_1-nsm_0'], ['update_maintenance_config','--eviction-type-random'], ['update_maintenance_config','enable-auto-cleanup-deleted-namespaces-days']]) # Add cluster already invokes a NSM checkup, so nothing should have changed VirtualAlbaBackend.run_log['backend_1-abm'] = [] AlbaArakoonController.nsm_checkup() self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm']) # Overload the only NSM and run NSM checkup. This should log a critical message, but change nothing VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25 Logger._logs = {} AlbaArakoonController.nsm_checkup() log_found = False for log_record in Logger._logs.get('lib', []): if 'All NSM clusters are overloaded' in log_record: log_found = True break self.assertTrue(expr=log_found) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm']) # Validate a maximum of 50 NSMs can be deployed current_nsms = [nsm_cluster.number for nsm_cluster in alba_backend.nsm_clusters] alba_structure = AlbaDalHelper.build_dal_structure( structure={'alba_nsm_clusters': [(1, 50)]}, # (<abackend_id>, <amount_of_nsm_clusters>) previous_structure=alba_structure ) # Try to add 1 additional NSM with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2]) self.assertEqual(first=str(raise_info.exception), second='The maximum of 50 NSM Arakoon clusters will be exceeded. Amount of clusters that can be deployed for this ALBA Backend: 0') # Remove the unused NSM clusters again for nsm_cluster in alba_structure['alba_nsm_clusters'][1][len(current_nsms):]: for nsm_service in nsm_cluster.nsm_services: nsm_service.delete() nsm_service.service.delete() nsm_cluster.delete() # Try to add a previously claimed NSM cluster with self.assertRaises(ValueError) as raise_info: AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_1]) # The provided cluster_name to claim has already been claimed self.assertEqual(first=str(raise_info.exception), second='Some of the provided cluster_names have already been claimed before') # Add a 2nd NSM cluster AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2]) self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services)) self.assertEqual(first=2, second=len(alba_backend.nsm_clusters)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services)) self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[1].nsm_services)) self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter) self.assertIsNone(obj=alba_backend.nsm_clusters[1].nsm_services[0].service.storagerouter) self.assertListEqual(list1=[['add_nsm_host', 'backend_1-nsm_1']], list2=VirtualAlbaBackend.run_log['backend_1-abm']) for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems(): arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) self.assertDictEqual(d1={'cluster_name': cluster_name, 'cluster_type': cluster_type, 'internal': False, 'in_use': True}, d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))