def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController from ovs.lib.vpool import VPoolController Toolbox.log(logger=NodeRemovalController._logger, messages='Remove node', boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages= 'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n' ) service_manager = ServiceFactory.get_manager() ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError('Node IP must be a string') if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError('Invalid IP {0} specified'.format(node_ip)) storage_router_all = sorted(StorageRouterList.get_storagerouters(), key=lambda k: k.name) storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set( [storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([ storage_router.ip for storage_router in storage_router_masters ]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) offline_reasons = {} if node_ip not in storage_router_all_ips: raise ValueError( 'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}' .format('\n - '.join(storage_router_all_ips), node_ip)) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len( storage_router_master_ips) == 1: raise RuntimeError( "Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( 'The node to be removed cannot be identical to the node on which the removal is initiated' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Creating SSH connections to remaining master nodes') master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username='******', timeout=10) except (UnableToConnectException, NotAuthenticatedException, TimeOutException) as ex: if isinstance(ex, UnableToConnectException): msg = 'Unable to connect' elif isinstance(ex, NotAuthenticatedException): msg = 'Could not authenticate' elif isinstance(ex, TimeOutException): msg = 'Connection timed out' Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- {1}'.format( storage_router.ip, msg)) offline_reasons[storage_router.ip] = msg storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False continue Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- Successfully connected' .format(storage_router.ip)) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER': master_ip = storage_router.ip if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError( 'Could not connect to any master node in the cluster') storage_router_to_remove.invalidate_dynamics('vdisks_guids') if len( storage_router_to_remove.vdisks_guids ) > 0: # vDisks are supposed to be moved away manually before removing a node raise RuntimeError( "Still vDisks attached to Storage Router {0}".format( storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed( service='memcached') internal_rabbit_mq = Toolbox.is_service_internally_managed( service='rabbitmq') memcached_endpoints = Configuration.get( key='/ovs/framework/memcache|endpoints') rabbit_mq_endpoints = Configuration.get( key='/ovs/framework/messagequeue|endpoints') copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints ) == 0 and internal_memcached is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the memcached service' ) if len(copy_rabbit_mq_endpoints ) == 0 and internal_rabbit_mq is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the messagequeue service' ) Toolbox.run_hooks(component='noderemoval', sub_component='validate_removal', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the validation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ################# # CONFIRMATIONS # ################# try: interactive = silent != '--force-yes' remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: if len(storage_routers_offline) > 0: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Offline nodes: {0}'.format(''.join( ('\n * {0:<15}- {1}.'.format(ip, message) for ip, message in offline_reasons.iteritems())))) valid_node_info = Interactive.ask_yesno( message= 'Continue the removal with these being presumably offline?', default_value=False) if valid_node_info is False: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Please validate the state of the nodes before removing.', title=True) sys.exit(1) proceed = Interactive.ask_yesno( message='Are you sure you want to remove node {0}?'.format( storage_router_to_remove.name), default_value=False) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages='Abort removal', title=True) sys.exit(1) remove_asd_manager = True if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') if service_manager.has_service(name='asd-manager', client=client): remove_asd_manager = Interactive.ask_yesno( message= 'Do you also want to remove the ASD manager and related ASDs?', default_value=False) if remove_asd_manager is True or storage_router_to_remove_online is False: for fct in Toolbox.fetch_hooks('noderemoval', 'validate_asd_removal'): validation_output = fct(storage_router_to_remove.ip) if validation_output['confirm'] is True: if Interactive.ask_yesno( message=validation_output['question'], default_value=False) is False: remove_asd_manager = False break except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the confirmation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ########### # REMOVAL # ########### try: Toolbox.log(logger=NodeRemovalController._logger, messages='Starting removal of node {0} - {1}'.format( storage_router_to_remove.name, storage_router_to_remove.ip)) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages= ' Marking all Storage Drivers served by Storage Router {0} as offline' .format(storage_router_to_remove.ip)) StorageDriverController.mark_offline( storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPools from node'.format( storage_router_to_remove.ip)) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPool {0} from node'.format( storage_driver.vpool.name)) VPoolController.shrink_vpool( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids) # Demote if MASTER if storage_router_to_remove.node_type == 'MASTER': NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages='Stopping and removing services') if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger) service = 'watcher-config' if service_manager.has_service(service, client=client): Toolbox.log( logger=NodeRemovalController._logger, messages='Removing service {0}'.format(service)) service_manager.stop_service(service, client=client) service_manager.remove_service(service, client=client) Toolbox.run_hooks(component='noderemoval', sub_component='remove', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages='Removing node from model') for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete('/ovs/framework/hosts/{0}'.format( storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') client.file_delete(filenames=[CACC_LOCATION]) client.file_delete(filenames=[CONFIG_STORE_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages='Successfully removed node\n') except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1) if remove_asd_manager is True and storage_router_to_remove_online is True: Toolbox.log(logger=NodeRemovalController._logger, messages='\nRemoving ASD Manager') with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system('asd-manager remove --force-yes') Toolbox.log(logger=NodeRemovalController._logger, messages='Remove nodes finished', title=True)
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) service_manager = ServiceFactory.get_manager() if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for demoting a node.' ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] shrink = False if cluster_ip in master_node_ips: shrink = True master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True and shrink is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format( arakoon_cluster_name)) arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.shrink_cluster(removal_ip=cluster_ip, offline_nodes=offline_node_ips) arakoon_installer.restart_cluster_after_shrinking() try: external_config = Configuration.get( '/ovs/framework/external_config') if external_config is None and shrink is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_node_ips[0]) arakoon_installer.shrink_cluster( removal_ip=cluster_ip, offline_nodes=offline_node_ips) arakoon_installer.restart_cluster_after_shrinking() except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get( '/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log( logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run([ 'rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name) ]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=[ '\nFailed to forget RabbitMQ cluster node', ex ], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if service_manager.has_service('rabbitmq-server', client=target_client): ServiceFactory.change_service_state( target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink( "/var/lib/rabbitmq/.erlang.cookie") ServiceFactory.change_service_state( target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=[ '\nFailed to remove/unconfigure RabbitMQ', ex ], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if service_manager.has_service(service, client=target_client): Toolbox.log( logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: ServiceFactory.change_service_state( target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=[ '\nFailed to stop service'.format(service), ex ], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = [ 'scheduled-tasks', 'webapp-api', 'volumerouter-consumer' ] for service in services: if service_manager.has_service(service, client=target_client): Toolbox.log( logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: ServiceFactory.change_service_state( target_client, service, 'stop', NodeTypeController._logger) service_manager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=[ '\nFailed to remove service'.format(service), ex ], loglevel='exception') if service_manager.has_service('workers', client=target_client): service_manager.add_service( name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set( '/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists( '/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) service_manager = ServiceFactory.get_manager() if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for promoting a node.' ) target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_ip) arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) service_manager.register_service( node_name=machine_id, service_metadata=arakoon_installer.service_metadata[cluster_ip] ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) arakoon_ports = arakoon_installer.ports[cluster_ip] if configure_memcached is True: NodeTypeController.configure_memcached( client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [ s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip ]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq( client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying RabbitMQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=0400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run([ 'rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname) ]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues ServiceFactory.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode( client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if service_manager.has_service(service, client=target_client): ServiceFactory.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run( ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set( '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name)) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name=arakoon_cluster_name, offline_nodes=offline_node_ips) try: external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name='config', offline_nodes=offline_node_ips, filesystem=True) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster') EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if ServiceManager.has_service('rabbitmq-server', client=target_client): Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie") Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer'] for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) ServiceManager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception') if ServiceManager.has_service('workers', client=target_client): ServiceManager.add_service(name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.') target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name='config', base_dir=Configuration.get('/ovs/framework/paths|ovsdb'), ports=[26400, 26401], filesystem=True) ArakoonInstaller.restart_cluster_add(cluster_name='config', current_ips=metadata['ips'], new_ip=cluster_ip, filesystem=True) ServiceManager.register_service(node_name=machine_id, service_metadata=metadata['service_metadata']) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster') EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') result = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name=arakoon_cluster_name, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=result['ips'], new_ip=cluster_ip, filesystem=False) arakoon_ports = [result['client_port'], result['messaging_port']] if configure_memcached is True: NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=master_node_ips, new_ip=cluster_ip, filesystem=False) PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')