def log(logger, messages, title=False, boxed=False, loglevel='info', silent=False): """ Print a message on stdout and log to file :param logger: Logger object to use for the logging :type logger: ovs.extensions.generic.logger.Logger :param messages: Messages to print and log :type messages: str or list :param title: If True some extra chars will be pre- and appended :type title: bool :param boxed: Use the Interactive boxed message print option :type boxed: bool :param loglevel: level to log on :type loglevel: str :param silent: If set to True, the messages will only be logged to file :type silent: bool :return: None """ if type(messages) in (str, basestring, unicode): messages = [messages] if silent is False: if boxed is True: print Interactive.boxed_message(lines=messages) else: for message in messages: if title is True: message = '\n+++ {0} +++\n'.format(message) if loglevel in ['error', 'exception']: message = 'ERROR: {0}'.format(message) print message for message in messages: getattr(logger, loglevel)(message)
def ask_validate_password(ip, logger, username='******', previous=None): """ Asks a user to enter the password for a given user on a given ip and validates it If previous is provided, we first attempt to login using the previous password, if successful, we don't ask for a password :param ip: IP of the node on which we want to validate / ask the password :type ip: str :param logger: Logger object to use for the logging :type logger: ovs.extensions.generic.logger.Logger :param username: Username to login with :type username: str :param previous: Previously used password for another node in the cluster :type previous: str :return: None """ try: SSHClient(ip, username) return None except: pass if previous is not None: try: SSHClient(ip, username=username, password=previous) return previous except: pass node_string = 'this node' if ip == '127.0.0.1' else ip while True: try: password = Interactive.ask_password( 'Enter the {0} password for {1}'.format( username, node_string)) if password in ['', None]: continue SSHClient(ip, username=username, password=password) return password except KeyboardInterrupt: raise except UnableToConnectException: raise except: Toolbox.log( logger=logger, messages= 'Password invalid or could not connect to this node')
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController from ovs.lib.vpool import VPoolController Toolbox.log(logger=NodeRemovalController._logger, messages='Remove node', boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages= 'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n' ) service_manager = ServiceFactory.get_manager() ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError('Node IP must be a string') if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError('Invalid IP {0} specified'.format(node_ip)) storage_router_all = sorted(StorageRouterList.get_storagerouters(), key=lambda k: k.name) storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set( [storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([ storage_router.ip for storage_router in storage_router_masters ]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) offline_reasons = {} if node_ip not in storage_router_all_ips: raise ValueError( 'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}' .format('\n - '.join(storage_router_all_ips), node_ip)) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len( storage_router_master_ips) == 1: raise RuntimeError( "Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( 'The node to be removed cannot be identical to the node on which the removal is initiated' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Creating SSH connections to remaining master nodes') master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username='******', timeout=10) except (UnableToConnectException, NotAuthenticatedException, TimeOutException) as ex: if isinstance(ex, UnableToConnectException): msg = 'Unable to connect' elif isinstance(ex, NotAuthenticatedException): msg = 'Could not authenticate' elif isinstance(ex, TimeOutException): msg = 'Connection timed out' Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- {1}'.format( storage_router.ip, msg)) offline_reasons[storage_router.ip] = msg storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False continue Toolbox.log( logger=NodeRemovalController._logger, messages=' * Node with IP {0:<15}- Successfully connected' .format(storage_router.ip)) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER': master_ip = storage_router.ip if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError( 'Could not connect to any master node in the cluster') storage_router_to_remove.invalidate_dynamics('vdisks_guids') if len( storage_router_to_remove.vdisks_guids ) > 0: # vDisks are supposed to be moved away manually before removing a node raise RuntimeError( "Still vDisks attached to Storage Router {0}".format( storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed( service='memcached') internal_rabbit_mq = Toolbox.is_service_internally_managed( service='rabbitmq') memcached_endpoints = Configuration.get( key='/ovs/framework/memcache|endpoints') rabbit_mq_endpoints = Configuration.get( key='/ovs/framework/messagequeue|endpoints') copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints ) == 0 and internal_memcached is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the memcached service' ) if len(copy_rabbit_mq_endpoints ) == 0 and internal_rabbit_mq is True: raise RuntimeError( 'Removal of provided nodes will result in a complete removal of the messagequeue service' ) Toolbox.run_hooks(component='noderemoval', sub_component='validate_removal', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the validation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ################# # CONFIRMATIONS # ################# try: interactive = silent != '--force-yes' remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: if len(storage_routers_offline) > 0: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.' ) Toolbox.log( logger=NodeRemovalController._logger, messages='Offline nodes: {0}'.format(''.join( ('\n * {0:<15}- {1}.'.format(ip, message) for ip, message in offline_reasons.iteritems())))) valid_node_info = Interactive.ask_yesno( message= 'Continue the removal with these being presumably offline?', default_value=False) if valid_node_info is False: Toolbox.log( logger=NodeRemovalController._logger, messages= 'Please validate the state of the nodes before removing.', title=True) sys.exit(1) proceed = Interactive.ask_yesno( message='Are you sure you want to remove node {0}?'.format( storage_router_to_remove.name), default_value=False) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages='Abort removal', title=True) sys.exit(1) remove_asd_manager = True if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') if service_manager.has_service(name='asd-manager', client=client): remove_asd_manager = Interactive.ask_yesno( message= 'Do you also want to remove the ASD manager and related ASDs?', default_value=False) if remove_asd_manager is True or storage_router_to_remove_online is False: for fct in Toolbox.fetch_hooks('noderemoval', 'validate_asd_removal'): validation_output = fct(storage_router_to_remove.ip) if validation_output['confirm'] is True: if Interactive.ask_yesno( message=validation_output['question'], default_value=False) is False: remove_asd_manager = False break except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'Removal has been aborted during the confirmation step. No changes have been applied.', boxed=True, loglevel='warning') sys.exit(1) except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel='exception') sys.exit(1) ########### # REMOVAL # ########### try: Toolbox.log(logger=NodeRemovalController._logger, messages='Starting removal of node {0} - {1}'.format( storage_router_to_remove.name, storage_router_to_remove.ip)) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages= ' Marking all Storage Drivers served by Storage Router {0} as offline' .format(storage_router_to_remove.ip)) StorageDriverController.mark_offline( storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPools from node'.format( storage_router_to_remove.ip)) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log(logger=NodeRemovalController._logger, messages=' Removing vPool {0} from node'.format( storage_driver.vpool.name)) VPoolController.shrink_vpool( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids) # Demote if MASTER if storage_router_to_remove.node_type == 'MASTER': NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages='Stopping and removing services') if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger) service = 'watcher-config' if service_manager.has_service(service, client=client): Toolbox.log( logger=NodeRemovalController._logger, messages='Removing service {0}'.format(service)) service_manager.stop_service(service, client=client) service_manager.remove_service(service, client=client) Toolbox.run_hooks(component='noderemoval', sub_component='remove', logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages='Removing node from model') for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete('/ovs/framework/hosts/{0}'.format( storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username='******') client.file_delete(filenames=[CACC_LOCATION]) client.file_delete(filenames=[CONFIG_STORE_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages='Successfully removed node\n') except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages='\n') Toolbox.log( logger=NodeRemovalController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1) if remove_asd_manager is True and storage_router_to_remove_online is True: Toolbox.log(logger=NodeRemovalController._logger, messages='\nRemoving ASD Manager') with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system('asd-manager remove --force-yes') Toolbox.log(logger=NodeRemovalController._logger, messages='Remove nodes finished', title=True)
def setup(): """ Interactive setup part for initial asd manager configuration """ _print_and_log(message=Interactive.boxed_message(['ASD Manager setup'])) # Gather information ipaddresses = OSFactory.get_manager().get_ip_addresses() if not ipaddresses: _print_and_log( level='error', message='\n' + Interactive.boxed_message( ['Could not retrieve IP information on local node'])) sys.exit(1) validation_ip_addresses = copy.deepcopy(ipaddresses) local_client = SSHClient(endpoint='127.0.0.1', username='******') service_manager = ServiceFactory.get_manager() if service_manager.has_service(MANAGER_SERVICE, local_client): _print_and_log(level='error', message='\n' + Interactive.boxed_message( ['The ASD Manager is already installed.'])) sys.exit(1) config = _validate_and_retrieve_pre_config() interactive = len(config) == 0 ipmi_info = {'ip': None, 'username': None, 'pwd': None} if interactive is False: api_ip = config['api_ip'] api_port = config.get('api_port', 8500) asd_ips = config.get('asd_ips', []) asd_start_port = config.get('asd_start_port', 8600) configuration_store = config.get('configuration_store', 'arakoon') ipmi_info = config.get('ipmi', ipmi_info) else: api_ip = Interactive.ask_choice( choice_options=ipaddresses, question='Select the public IP address to be used for the API', sort_choices=True) api_port = Interactive.ask_integer( question="Select the port to be used for the API", min_value=1025, max_value=65535, default_value=8500) asd_ips = [] add_ips = True ipaddresses.append('All') while add_ips: current_ips = ' - Current selected IPs: {0}'.format(asd_ips) new_asd_ip = Interactive.ask_choice( choice_options=ipaddresses, question= "Select an IP address to be used for the ASDs or 'All' (All current and future interfaces: 0.0.0.0){0}" .format(current_ips if len(asd_ips) > 0 else ''), default_value='All') if new_asd_ip == 'All': ipaddresses.remove('All') asd_ips = [ ] # Empty list maps to all IPs - checked when configuring ASDs add_ips = False else: asd_ips.append(new_asd_ip) ipaddresses.remove(new_asd_ip) add_ips = Interactive.ask_yesno( "Do you want to add another IP?") asd_start_port = Interactive.ask_integer( question="Select the port to be used for the ASDs", min_value=1025, max_value=65435, default_value=8600) configuration_store = 'arakoon' message = 'Do you want to set IPMI configuration keys?' proceed = Interactive.ask_yesno(message=message, default_value=False) if proceed is True: ipmi_info['ip'] = Interactive.ask_string( message='Enter the IPMI IP address', regex_info={'regex': ExtensionsToolbox.regex_ip}) ipmi_info['username'] = Interactive.ask_string( message='Enter the IPMI username') ipmi_info['pwd'] = Interactive.ask_password( message='Enter the IPMI password') if api_ip not in validation_ip_addresses: _print_and_log( level='error', message='\n' + Interactive.boxed_message(lines=[ 'Invalid API IP {0} specified. Please choose from:'.format( api_ip) ] + [' * {0}'.format(ip) for ip in ipaddresses])) sys.exit(1) different_ips = set(asd_ips).difference(set(validation_ip_addresses)) if different_ips: _print_and_log( level='error', message='\n' + Interactive.boxed_message(lines=[ 'Invalid ASD IPs {0} specified. Please choose from:'.format( asd_ips) ] + [' * {0}'.format(ip) for ip in ipaddresses])) sys.exit(1) if api_port in range(asd_start_port, asd_start_port + 100): _print_and_log( level='error', message='\n' + Interactive.boxed_message( ['API port cannot be in the range of the ASD port + 100'])) sys.exit(1) if interactive is True: while not local_client.file_exists(CACC_LOCATION): _print_and_log( level='warning', message= ' - Please place a copy of the Arakoon\'s client configuration file at: {0}' .format(CACC_LOCATION)) Interactive.ask_continue() local_client.file_write(filename=CONFIG_STORE_LOCATION, contents=json.dumps( {'configuration_store': configuration_store}, indent=4)) node_id = Configuration.initialize( config={ 'api_ip': api_ip, 'asd_ips': asd_ips, 'api_port': api_port, 'asd_start_port': asd_start_port, 'ipmi': ipmi_info }) # Model settings _print_and_log(message=' - Store settings in DB') for code, value in { 'api_ip': api_ip, 'api_port': api_port, 'configuration_store': configuration_store, 'node_id': node_id }.iteritems(): setting = Setting() setting.code = code setting.value = value setting.save() # Deploy/start services _print_and_log(message=' - Deploying and starting services') service_manager.add_service(name=MANAGER_SERVICE, client=local_client) service_manager.add_service(name=WATCHER_SERVICE, client=local_client) _print_and_log(message=' - Starting watcher service') try: service_manager.start_service(name=WATCHER_SERVICE, client=local_client) except Exception: Configuration.uninitialize() _print_and_log(level='exception', message='\n' + Interactive.boxed_message(['Starting watcher failed'])) sys.exit(1) _print_and_log(message='\n' + Interactive.boxed_message(['ASD Manager setup completed']))
def _validate_and_retrieve_pre_config(): """ Validate whether the values in the pre-configuration file are valid :return: JSON contents """ if not os.path.exists(PRECONFIG_FILE): return {} with open(PRECONFIG_FILE) as pre_config: try: config = json.loads(pre_config.read()) except Exception: _print_and_log( level='exception', message='\n' + Interactive.boxed_message([ 'JSON contents could not be retrieved from file {0}'. format(PRECONFIG_FILE) ])) sys.exit(1) if 'asdmanager' not in config or not isinstance(config['asdmanager'], dict): _print_and_log( level='error', message='\n' + Interactive.boxed_message([ 'The ASD manager pre-configuration file must contain a "asdmanager" key with a dictionary as value' ])) sys.exit(1) errors = [] config = config['asdmanager'] actual_keys = config.keys() allowed_keys = [ 'api_ip', 'api_port', 'asd_ips', 'asd_start_port', 'configuration_store', 'ipmi' ] for key in actual_keys: if key not in allowed_keys: errors.append( 'Key {0} is not supported by the ASD manager'.format(key)) if len(errors) > 0: _print_and_log( level='error', message='\n' + Interactive.boxed_message([ 'Errors found while verifying pre-configuration:', ' - {0}'.format('\n - '.join(errors)), '', 'Allowed keys:\n' ' - {0}'.format('\n - '.join(allowed_keys)) ])) sys.exit(1) try: ExtensionsToolbox.verify_required_params( actual_params=config, required_params={ 'api_ip': (str, ExtensionsToolbox.regex_ip, True), 'asd_ips': (list, ExtensionsToolbox.regex_ip, False), 'api_port': (int, { 'min': 1025, 'max': 65535 }, False), 'asd_start_port': (int, { 'min': 1025, 'max': 65435 }, False), 'configuration_store': (str, ['arakoon'], False) }) if config.get('ipmi') is not None: ExtensionsToolbox.verify_required_params( actual_params=config.get('ipmi'), required_params={ 'ip': (str, ExtensionsToolbox.regex_ip, True), 'username': (str, None, True), 'pwd': (str, None, True) }) except RuntimeError: _print_and_log(message='\n' + Interactive.boxed_message([ 'The asd-manager pre-configuration file does not contain correct information' ]), level='exception') sys.exit(1) return config
def remove(silent=None): """ Interactive removal part for the ASD manager :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None :rtype: NoneType """ _print_and_log(message='\n' + Interactive.boxed_message(['ASD Manager removal'])) local_client = SSHClient(endpoint='127.0.0.1', username='******') if not local_client.file_exists( filename='{0}/main.db'.format(Setting.DATABASE_FOLDER)): _print_and_log(level='error', message='\n' + Interactive.boxed_message( ['The ASD Manager has already been removed'])) sys.exit(1) _print_and_log(message=' - Validating configuration management') try: Configuration.list(key='ovs') except: _print_and_log( level='exception', message='\n' + Interactive.boxed_message(['Could not connect to Arakoon'])) sys.exit(1) _print_and_log(message=' - Retrieving ASD information') all_asds = {} try: all_asds = ASDList.get_asds() except: _print_and_log(level='exception', message=' - Failed to retrieve the ASD information') interactive = silent != '--force-yes' if interactive is True: message = 'Are you sure you want to continue?' if len(all_asds) > 0: _print_and_log(message='\n\n+++ ALERT +++\n', level='warning') message = 'DATA LOSS possible if proceeding! Continue?' proceed = Interactive.ask_yesno(message=message, default_value=False) if proceed is False: _print_and_log(level='error', message='\n' + Interactive.boxed_message(['Abort removal'])) sys.exit(1) if len(all_asds) > 0: _print_and_log(message=' - Removing disks') for disk in DiskList.get_disks(): if disk.available is True: continue try: _print_and_log( message=' - Retrieving ASD information for disk {0}'. format(disk.name)) for asd in disk.asds: _print_and_log( message=' - Removing ASD {0}'.format(asd.name)) ASDController.remove_asd(asd) DiskController.clean_disk(disk) except Exception: _print_and_log(level='exception', message=' - Deleting ASDs failed') _print_and_log(message=' - Removing services') service_manager = ServiceFactory.get_manager() for service in MaintenanceController.get_services(): service_name = service _print_and_log( message=' - Removing service {0}'.format(service_name)) guid = None for alba_backend_guid in Configuration.list(key='/ovs/alba/backends'): for maintenance_service_name in Configuration.list( key='/ovs/alba/backends/{0}/maintenance/'.format( alba_backend_guid)): if maintenance_service_name == service_name: guid = alba_backend_guid break MaintenanceController.remove_maintenance_service( name=service_name, alba_backend_guid=guid) for service_name in [WATCHER_SERVICE, MANAGER_SERVICE]: if service_manager.has_service(name=service_name, client=local_client): _print_and_log( message=' - Removing service {0}'.format(service_name)) service_manager.stop_service(name=service_name, client=local_client) service_manager.remove_service(name=service_name, client=local_client) _print_and_log(message=' - Removing from configuration management') remaining_users = Configuration.uninitialize() if not remaining_users: local_client.file_delete(filenames=CACC_LOCATION) local_client.file_delete( filenames='{0}/main.db'.format(Setting.DATABASE_FOLDER)) _print_and_log( message='\n' + Interactive.boxed_message(['ASD Manager removal completed']))