def extend_arakoon(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir, service_type=ServiceType.ARAKOON_CLUSTER_TYPES.FWK, clustered_nodes=None): """ Adds a external arakoon to a storagerouter :param cluster_name: name of the already existing arakoon cluster :type cluster_name: str :param master_storagerouter_ip: master ip address of the existing arakoon cluster e.g. 10.100.199.11 :type master_storagerouter_ip: str :param storagerouter_ip: ip of a new storagerouter to extend to e.g. 10.100.199.12 :type storagerouter_ip: str :param cluster_basedir: absolute path for the new arakoon cluster :type cluster_basedir: str :param service_type: type of plugin for arakoon (DEFAULT=ServiceType.ARAKOON_CLUSTER_TYPES.FWK) * FWK * ABM * NSM :type service_type: ovs.dal.hybrids.ServiceType.ARAKOON_CLUSTER_TYPES :param clustered_nodes: nodes who are available for the arakoon (including the to be extended_arakoon) e.g. ['10.100.199.11', '10.100.199.12'] (DEFAULT=[]) :type clustered_nodes: list :return: is created or not :rtype: bool """ if clustered_nodes is None: clustered_nodes = [] client = SSHClient(storagerouter_ip, username='******') # create required directories if not client.dir_exists(cluster_basedir): client.dir_create(cluster_basedir) ArakoonSetup.LOGGER.info( "Starting extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`" .format(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir)) arakoon_installer = ArakoonInstaller(cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter_ip, base_dir=cluster_basedir, locked=False, log_sinks=Logger.get_sink_path('automation_lib_arakoon_server'), crash_log_sinks=Logger.get_sink_path( 'automation_lib_arakoon_server_crash')) if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: client.run([ 'ln', '-s', '/usr/lib/alba/albamgr_plugin.cmxs', '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name) ]) elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: client.run([ 'ln', '-s', '/usr/lib/alba/nsm_host_plugin.cmxs', '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name) ]) # checking if we need to restart the given nodes if len(clustered_nodes) != 0: ArakoonSetup.LOGGER.info( "Trying to restart all given nodes of arakoon: {0}".format( clustered_nodes, cluster_name)) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter_ip) ArakoonSetup.LOGGER.info( "Finished restarting all given nodes of arakoon: {0}".format( clustered_nodes, cluster_name)) ArakoonSetup.LOGGER.info( "Finished extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`" .format(cluster_name, master_storagerouter_ip, storagerouter_ip, cluster_basedir))
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) service_manager = ServiceFactory.get_manager() if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for promoting a node.' ) target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_ip) arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) service_manager.register_service( node_name=machine_id, service_metadata=arakoon_installer.service_metadata[cluster_ip] ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) arakoon_ports = arakoon_installer.ports[cluster_ip] if configure_memcached is True: NodeTypeController.configure_memcached( client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [ s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip ]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq( client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying RabbitMQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=0400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run([ 'rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname) ]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues ServiceFactory.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode( client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if service_manager.has_service(service, client=target_client): ServiceFactory.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run( ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set( '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def _voldrv_arakoon_checkup(create_cluster): def _add_service(service_storagerouter, arakoon_ports, service_name): """ Add a service to the storage router """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = arakoon_ports new_service.storagerouter = service_storagerouter new_service.save() return new_service current_ips = [] current_services = [] service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is not None: arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in service_type.services: if service.name == arakoon_service_name: current_services.append(service) if service.is_internal is True: current_ips.append(service.storagerouter.ip) all_sr_ips = [ storagerouter.ip for storagerouter in StorageRouterList.get_slaves() ] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition( storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len( current_services) == 0: # Create new cluster metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD) if metadata is None: # No externally managed cluster found, we create 1 ourselves if not available_storagerouters: raise RuntimeError( 'Could not find any Storage Router with a DB role') storagerouter, partition = available_storagerouters.items()[0] arakoon_voldrv_cluster = 'voldrv' arakoon_installer = ArakoonInstaller( cluster_name=arakoon_voldrv_cluster) arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(arakoon_voldrv_cluster)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format( arakoon_voldrv_cluster))) arakoon_installer.start_cluster() ports = arakoon_installer.ports[storagerouter.ip] metadata = arakoon_installer.metadata current_ips.append(storagerouter.ip) else: ports = [] storagerouter = None cluster_name = metadata['cluster_name'] Configuration.set('/ovs/framework/arakoon_clusters|voldrv', cluster_name) StorageDriverController._logger.info( 'Claiming {0} managed arakoon cluster: {1}'.format( 'externally' if storagerouter is None else 'internally', cluster_name)) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name) current_services.append( _add_service( service_storagerouter=storagerouter, arakoon_ports=ports, service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name))) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is None: return metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if 0 < len(current_services) < len( available_storagerouters) and metadata['internal'] is True: for storagerouter, partition in available_storagerouters.iteritems( ): if storagerouter.ip in current_ips: continue arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(cluster_name)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format(cluster_name))) _add_service( service_storagerouter=storagerouter, arakoon_ports=arakoon_installer.ports[storagerouter.ip], service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name)) current_ips.append(storagerouter.ip) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def ensure_nsm_clusters_load(cls, alba_backend, nsms_per_storagerouter=None, min_internal_nsms=1, external_nsm_cluster_names=None, version_str=None, ssh_clients=None): # type: (AlbaBackend, Optional[Dict[StorageRouter, int]], Optional[int], Optional[List[str], Optional[str]], Optional[StorageRouter, SSHClient]) -> None """ Ensure that all NSM clusters are not overloaded :param alba_backend: Alba Backend to ensure NSM Cluster load for :type alba_backend: AlbaBackend :param nsms_per_storagerouter: Amount of NSMs mapped by StorageRouter :type nsms_per_storagerouter: Dict[StorageRouter, int] :param min_internal_nsms: Minimum amount of NSM hosts that need to be provided :type min_internal_nsms: int :param external_nsm_cluster_names: Information about the additional clusters to claim (only for externally managed Arakoon clusters) :type external_nsm_cluster_names: list :param version_str: Alba version string :type version_str: str :param ssh_clients: SSHClients to use :type ssh_clients: Dict[Storagerouter, SSHClient] :return: None :rtype: NoneType """ if ssh_clients is None: ssh_clients = {} if external_nsm_cluster_names is None: external_nsm_cluster_names = [] nsms_per_storagerouter = nsms_per_storagerouter if nsms_per_storagerouter is not None else cls.get_nsms_per_storagerouter( alba_backend) version_str = version_str or AlbaArakoonInstaller.get_alba_version_string( ) nsm_loads = cls.get_nsm_loads(alba_backend) internal = AlbaArakoonInstaller.is_internally_managed(alba_backend) abm_cluster_name = alba_backend.abm_cluster.name safety = Configuration.get( '/ovs/framework/plugins/alba/config|nsm.safety') maxload = Configuration.get( '/ovs/framework/plugins/alba/config|nsm.maxload') overloaded = min(nsm_loads.values()) >= maxload if not overloaded: # At least 1 NSM is not overloaded yet AlbaArakoonController._logger.debug( 'ALBA Backend {0} - NSM load OK'.format(alba_backend.name)) if internal: # When load is not OK, deploy at least 1 additional NSM nsms_to_add = max(0, min_internal_nsms - len(nsm_loads)) else: nsms_to_add = len(external_nsm_cluster_names) if nsms_to_add == 0: return else: AlbaArakoonController._logger.warning( 'ALBA Backend {0} - NSM load is NOT OK'.format( alba_backend.name)) if internal: # When load is not OK, deploy at least 1 additional NSM nsms_to_add = max(1, min_internal_nsms - len(nsm_loads)) else: # For externally managed clusters we only claim the specified clusters, if none provided, we just log it nsms_to_add = len(external_nsm_cluster_names) if nsms_to_add == 0: cls._logger.critical( 'ALBA Backend {0} - All NSM clusters are overloaded'. format(alba_backend.name)) return # Deploy new (internal) or claim existing (external) NSM clusters cls._logger.debug( 'ALBA Backend {0} - Currently {1} NSM cluster{2}'.format( alba_backend.name, len(nsm_loads), '' if len(nsm_loads) == 1 else 's')) AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Trying to add {1} NSM cluster{2}'.format( alba_backend.name, nsms_to_add, '' if nsms_to_add == 1 else 's')) base_number = max(nsm_loads.keys()) + 1 for index, number in enumerate( xrange(base_number, base_number + nsms_to_add)): if not internal: # External clusters master_client = None if not ssh_clients: for storagerouter in StorageRouterList.get_masters(): try: master_client = SSHClient(storagerouter) except UnableToConnectException: cls._logger.warning( 'StorageRouter {0} with IP {1} is not reachable' .format(storagerouter.name, storagerouter.ip)) else: for storagerouter, ssh_client in ssh_clients.iteritems(): if storagerouter.node_type == 'MASTER': master_client = ssh_client if not master_client: raise ValueError('Could not find an online master node') # @todo this might raise an indexerror? nsm_cluster_name = external_nsm_cluster_names[index] cls._logger.debug( 'ALBA Backend {0} - Claiming NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name)) metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM, cluster_name=nsm_cluster_name) if metadata is None: cls._logger.critical( 'ALBA Backend {0} - NSM cluster with name {1} could not be found' .format(alba_backend.name, nsm_cluster_name)) continue cls._logger.debug( 'ALBA Backend {0} - Modeling services'.format( alba_backend.name)) AlbaArakoonInstaller.model_arakoon_service( alba_backend=alba_backend, cluster_name=nsm_cluster_name, number=number) cls._logger.debug('ALBA Backend {0} - Registering NSM'.format( alba_backend.name)) NSMInstaller.register_nsm(abm_name=abm_cluster_name, nsm_name=nsm_cluster_name, ip=master_client.ip) AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Extended cluster'.format( alba_backend.name)) else: # Internal clusters nsm_cluster_name = '{0}-nsm_{1}'.format( alba_backend.name, number) cls._logger.debug( 'ALBA Backend {0} - Adding NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name)) # One of the NSM nodes is overloaded. This means the complete NSM is considered overloaded # Figure out which StorageRouters are the least occupied loads = sorted(nsms_per_storagerouter.values())[:safety] storagerouters = [] for storagerouter, load in nsms_per_storagerouter.iteritems(): if load in loads: storagerouters.append(storagerouter) if len(storagerouters) == safety: break # Creating a new NSM cluster for sub_index, storagerouter in enumerate(storagerouters): nsms_per_storagerouter[storagerouter] += 1 partition = AlbaArakoonInstaller.get_db_partition( storagerouter) arakoon_installer = ArakoonInstaller( cluster_name=nsm_cluster_name) # @todo Use deploy and extend code. (Disable register nsm in those parts) if sub_index == 0: arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM, ip=storagerouter.ip, base_dir=partition.folder, plugins={NSM_PLUGIN: version_str}) else: cls._logger.debug( 'ALBA Backend {0} - Extending NSM cluster {1}'. format(alba_backend.name, nsm_cluster_name)) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, plugins={NSM_PLUGIN: version_str}) cls._logger.debug( 'ALBA Backend {0} - Linking plugins'.format( alba_backend.name)) ssh_client = ssh_clients.get(storagerouter) or SSHClient( StorageRouter) AlbaArakoonInstaller.link_plugins( client=ssh_client, data_dir=partition.folder, plugins=[NSM_PLUGIN], cluster_name=nsm_cluster_name) cls._logger.debug( 'ALBA Backend {0} - Modeling services'.format( alba_backend.name)) AlbaArakoonInstaller.model_arakoon_service( alba_backend=alba_backend, cluster_name=nsm_cluster_name, ports=arakoon_installer.ports[storagerouter.ip], storagerouter=storagerouter, number=number) if sub_index == 0: cls._logger.debug( 'ALBA Backend {0} - Starting cluster'.format( alba_backend.name)) arakoon_installer.start_cluster() else: AlbaArakoonController._logger.debug( 'ALBA Backend {0} - Restarting cluster'.format( alba_backend.name)) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) cls._logger.debug('ALBA Backend {0} - Registering NSM'.format( alba_backend.name)) NSMInstaller.register_nsm(abm_name=abm_cluster_name, nsm_name=nsm_cluster_name, ip=storagerouters[0].ip) cls._logger.debug( 'ALBA Backend {0} - Added NSM cluster {1}'.format( alba_backend.name, nsm_cluster_name))