def is_update_required(cls): # type: () -> bool """ Determine if the update is required Checks if an updated package is available :return: True if the update is required :rtype: bool """ cls.validate_binaries() local_client = SSHClient('127.0.0.1', username='******') # cls.PACKAGE_MANAGER.install(package, local_client) all_package_names = list(chain.from_iterable([b[0] for b in cls.BINARIES])) cls.logger.info('Retrieving installed versions for {}'.format(', '.join(all_package_names))) installed_versions = cls.PACKAGE_MANAGER.get_installed_versions(local_client, all_package_names) cls.logger.info('Retrieving candidate versions for {}'.format(', '.join(all_package_names))) candidate_versions = cls.PACKAGE_MANAGER.get_candidate_versions(local_client, all_package_names) if len(candidate_versions) != len(all_package_names): raise ValueError('Not all packages were accounted for. Required {}. Found: {}'.format(', '.join(all_package_names), ', '.join(candidate_versions))) for package_name, version in candidate_versions.iteritems(): if package_name not in installed_versions: cls.logger.info('{} is not yet installed. Update required'.format(package_name)) return True version_installed = installed_versions[package_name] loose_version = LooseVersion(version) if not isinstance(version, LooseVersion) else version loose_version_installed = LooseVersion(version_installed) if not isinstance(version_installed, LooseVersion) else version_installed if loose_version_installed < loose_version: cls.logger.info('{} can be updated'.format(package_name)) return True return False
def model_devices(cls, ssh_client=None, name_alias_mapping=None, s3=False): # type: (Optional[SSHClient], Optional[AliasMapping], Optional[bool]) -> Tuple[List[Disk], AliasMapping] """ Model all disks that are currently on this machine :param ssh_client: SSHClient instance :type ssh_client: SSHClient :param name_alias_mapping: The name to alias mapping (Optional) :type name_alias_mapping: dict :param s3: Whether or not to account for AWS ec2 instances :type s3: bool :return: A list of modeled disks, The name to alias mapping used, the alias to name mapping used :rtype: Tuple[List[Disk], dict, dict] """ ssh_client = ssh_client or SSHClient('127.0.0.1', username='******') if not name_alias_mapping: name_alias_mapping = cls.retrieve_alias_mapping(ssh_client) if s3: name_alias_mapping.update(cls.map_s3_volumes()) block_devices = cls._model_block_devices(ssh_client) cls.logger.info('Starting to iterate over disks') disks = cls._model_devices(ssh_client, name_alias_mapping, block_devices) return disks, name_alias_mapping
def get_local_root_client(): # type: () -> SSHClient """ Return a local root client :return: The root client :rtype: SSHClient """ return SSHClient('127.0.0.1', username='******')
def install_package(cls, package): # type: (str) -> None """ Install a package :param package: Package to install :type package: str :return: None """ local_client = SSHClient('127.0.0.1', username='******') cls.PACKAGE_MANAGER.install(package, local_client)
def get_service_status(name): # type: (str) -> Union[Tuple[bool, str], None] """ Retrieve the status of the service specified :param name: Name of the service to check :type name: str :return: Status of the service :rtype: dict """ client = SSHClient(endpoint='127.0.0.1', username='******') service_manager = ServiceFactory.get_manager() if service_manager.has_service(name=name, client=client): status = service_manager.get_service_status(name=name, client=client) return status == 'active', status return None
def test_text_cleanup(self): tests = { 0: ['foobar', 'foobar'], 1: ['aàcçnñ', 'aaccnn'], 2: [ 'foo\u201ebar', 'foo\u201ebar' ], # This isn't an actual unicode character, just the characters \, u, 2, 0, 1 and e 3: [u'foobar', 'foobar'], 4: [u'foo\u1234bar', 'foobar'], 5: [u'foo\u201ebar', 'foo"bar'], 6: [u'aàcçnñ', 'aaccnn'] } for test in sorted(tests.keys()): original, cleaned = tests[test] try: self.assertEqual(SSHClient._clean_text(original), cleaned) except: raise
def mountpoint_usable(cls, mountpoint, ssh_client=None): # type: (str, SSHClient) -> bool """ See if the mountpoint is usable :param mountpoint: Mountpoint to test :type mountpoint: str :param ssh_client: Client to use :type ssh_client: SSHClient :return: True if the mountpoint is usable :rtype: bool """ ssh_client = ssh_client or SSHClient('127.0.0.1', username='******') try: filename = '{0}/{1}'.format(mountpoint, str(time.time())) ssh_client.run(['touch', filename]) ssh_client.run(['rm', filename]) return True except Exception: return False
def retrieve_alias_mapping(cls, ssh_client=None): # type: (SSHClient) -> AliasMapping """ Retrieve the alias mapping. Both ways :return: The AliasMapping :rtype: AliasMapping """ ssh_client = ssh_client or SSHClient('127.0.0.1', username='******') name_alias_mapping = AliasMapping() for path_type in ssh_client.dir_list(directory='/dev/disk'): if path_type in [ 'by-uuid', 'by-partuuid' ]: # UUIDs can change after creating a filesystem on a partition continue directory = '/dev/disk/{0}'.format(path_type) for symlink in ssh_client.dir_list(directory=directory): symlink_path = '{0}/{1}'.format(directory, symlink) link = ssh_client.file_read_link(path=symlink_path) if link not in name_alias_mapping: name_alias_mapping[link] = [] name_alias_mapping[link].append(symlink_path) return name_alias_mapping
class NBDManager(object): """ Abstract implementation of the NBD Manager object to create, start, stop or remove NBD device services on this node. """ DEVICE_PATH = '/dev/{0}' SERVICE_NAME = 'ovs-{0}-{1}' NBD_SERVICE_NAME = '{0}_{1}' MINIMAL_BLOCK_SIZE = 32 * 1024 OPT_CONFIG_PATH = '/etc/ovs_nbd/{0}' NODE_PATH = NBD_ID SERVICE_FILE_PATH = '/usr/lib/python2.7/dist-packages/ovs_extensions/config/{0}/' # Service file settings: to be overruled SERVICE_SCRIPT_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'nbdservice.py') # Working directory for the service WORKING_DIRECTORY = '/usr/lib/python2.7/dist-packages/ovs_extensions/' # Working directory for the service MODULE_PATH = '' # Empty for the extensions as the ovs_extensions package is found under dist-packages MANAGER_SERVICE_NAME = '' def __init__(self): # type: () -> None self._configuration = self._get_configuration() self._service_manager = self._get_service_manager() self._client = SSHClient('127.0.0.1', username='******') @staticmethod def _get_configuration(): raise NotImplementedError() @staticmethod def _get_service_manager(): raise NotImplementedError() def create_service(self, volume_uri, block_size=MINIMAL_BLOCK_SIZE): # type: (str, int) -> str """ Create NBD service :param volume_uri: tcp://user:pass@ip:port/volume-name :param block_size: block size in bytes :return: path /dev/nbdx :raises: RuntimeError if volume uri -ip:port does not match ip regex -tcp does not match tcp connection regex block size is too small or no integer volumedriver-nbd package is not installed """ # Unittests if os.environ.get('RUNNING_UNITTESTS') == 'True': node_id = 'unittest_guid' else: node_id = System.get_my_machine_id().strip() # Parameter verification cache = apt.Cache() try: cache['volumedriver-nbd'].is_installed except KeyError: raise RuntimeError('Package volumedriver-nbd is not yet installed') if type(volume_uri) != str: raise RuntimeError( 'Invalid parameter: {0} should be of type `str`'.format( volume_uri)) if type(block_size) != int or block_size < self.MINIMAL_BLOCK_SIZE: raise RuntimeError( 'Invalid parameter: {0} should be of type `int` and bigger then > {1}' .format(block_size, self.MINIMAL_BLOCK_SIZE)) node_path = self.NODE_PATH.format(node_id) user_pass, ip_port = volume_uri.split('@') ip_port, vol_name = ip_port.split('/') ExtensionsToolbox.verify_required_params(required_params={ 'user_pass': (str, ExtensionsToolbox.regex_tcp_conn, True), 'ip_port': (str, ExtensionsToolbox.regex_ip_port, True), 'vol_name': (str, None, True) }, actual_params={ 'user_pass': user_pass, 'ip_port': ip_port, 'vol_name': vol_name }, verify_keys=True) nbd_number = self._find_first_free_device_number(node_path) config_path = os.path.join(node_path, nbd_number, 'config') # Set self._configuration keys and values in local config nbd_path = self.DEVICE_PATH.format(nbd_number) config_settings = {'volume_uri': volume_uri, 'nbd_path': nbd_path} if block_size > NBDManager.MINIMAL_BLOCK_SIZE: config_settings['libovsvoldrv_request_split_size'] = block_size self._configuration.set(key=config_path, value=yaml.dump(config_settings, default_flow_style=False), raw=True) # Add service opt_config_path = self.OPT_CONFIG_PATH.format(nbd_number) if not self._client.file_exists(opt_config_path): self._client.file_create(opt_config_path) self._service_manager.add_service( name='nbd', client=self._client, params={ 'NODE_ID': str(node_id), 'NBDX': nbd_number, 'SCRIPT': self.SERVICE_SCRIPT_PATH, 'WD': self. WORKING_DIRECTORY, # Module path and wd depend on the module the nbd service is called in eg. ISCSI manager 'MODULE_PATH': self.MODULE_PATH, 'MGR_SERVICE': self.MANAGER_SERVICE_NAME }, target_name=self.SERVICE_NAME.format(nbd_number, vol_name), path=self.SERVICE_FILE_PATH) return nbd_path def _find_first_free_device_number(self, node_path): # type: (str) -> str """ Find first device number that is not in use :param node_path: path on the node where devices can be found :return: nbdX """ if os.environ.get('RUNNING_UNITTESTS') == 'True': nbd_number = 'nbd_unittest_number' else: if self._configuration.dir_exists(node_path): nbd_numbers = [ int(i.lstrip('nbd')) for i in list(self._configuration.list(node_path)) ] found = False starting_number = 0 while found is False: if starting_number in nbd_numbers: starting_number += 1 else: found = True else: starting_number = 0 nbd_number = 'nbd{0}'.format(starting_number).strip() return nbd_number def _get_service_file_path(self, nbd_path): # type: (str) -> str """ Get the arakoon service file path of the given service :param nbd_path: /dev/nbdx :return: return the service config path :raises: RuntimeError when multiple or no paths are found """ nbd_number = nbd_path.split('/')[-1] local_id = System.get_my_machine_id() paths = [ i for i in self._configuration.list(self.NODE_PATH.format(local_id), recursive=True) if i.endswith('config') and nbd_number in i ] if len(paths) > 1: raise RuntimeError( 'More then 1 path has been found for given nbd_path: {0}'. format(paths)) if len(paths) == 0: raise RuntimeError('No configpath has been found for given device') return paths[0] def _get_vol_name(self, nbd_path): # type: (str) -> str """ Parse volume name from config file specified for nbd_path :param nbd_path: /dev/nbdx :return: volume name """ nbd_service_path = self._get_service_file_path(nbd_path) content = self._configuration.get(nbd_service_path, raw=True) content = yaml.load(content) vol_name = content.get('volume_uri').rsplit('/')[-1] return vol_name def destroy_device(self, nbd_path): # type: (str) -> None """ Destroy NBD device with given path :param nbd_path: /dev/NBDX :return: whether or not the destroy action failed :raises OSError """ nbd_number = nbd_path.split('/')[-1] vol_name = self._get_vol_name(nbd_path) if self._service_manager.has_service(self.SERVICE_NAME.format( nbd_number, vol_name), client=self._client): self._service_manager.stop_service(self.SERVICE_NAME.format( nbd_number, vol_name), client=self._client) self._service_manager.remove_service(self.SERVICE_NAME.format( nbd_number, vol_name), client=self._client) path_to_delete = str( os.path.join( self.NODE_PATH.format(System.get_my_machine_id().strip()), nbd_number) ) # Casting to string as the DAL might have returned a unicode self._configuration.delete(path_to_delete) try: os.remove(self.OPT_CONFIG_PATH.format(nbd_number)) except OSError: pass def start_device(self, nbd_path): # type: (str) -> None """ Start NBD device with given path :param nbd_path: /dev/NBDX :return: whether or not the start action succeeded """ nbd_number = nbd_path.rsplit('/')[-1] vol_name = self._get_vol_name(nbd_path) if self._service_manager.has_service( self.SERVICE_NAME.format(nbd_number, vol_name), self._client): self._service_manager.start_service( self.SERVICE_NAME.format(nbd_number, vol_name), self._client) def stop_device(self, nbd_path): # type: (str) -> None """ Stop the NBD device with the given /dev/nbdx path on current node :param nbd_path: /dev/NBDX :return: None """ nbd_number = nbd_path.split('/')[-1] vol_name = self._get_vol_name(nbd_path) if self._service_manager.has_service(self.SERVICE_NAME.format( nbd_number, vol_name), client=self._client): self._service_manager.stop_service(self.SERVICE_NAME.format( nbd_number, vol_name), client=self._client)
def setup(): """ Interactive setup part for initial asd manager configuration """ _print_and_log(message=Interactive.boxed_message(['ASD Manager setup'])) # Gather information ipaddresses = OSFactory.get_manager().get_ip_addresses() if not ipaddresses: _print_and_log( level='error', message='\n' + Interactive.boxed_message( ['Could not retrieve IP information on local node'])) sys.exit(1) validation_ip_addresses = copy.deepcopy(ipaddresses) local_client = SSHClient(endpoint='127.0.0.1', username='******') service_manager = ServiceFactory.get_manager() if service_manager.has_service(MANAGER_SERVICE, local_client): _print_and_log(level='error', message='\n' + Interactive.boxed_message( ['The ASD Manager is already installed.'])) sys.exit(1) config = _validate_and_retrieve_pre_config() interactive = len(config) == 0 ipmi_info = {'ip': None, 'username': None, 'pwd': None} if interactive is False: api_ip = config['api_ip'] api_port = config.get('api_port', 8500) asd_ips = config.get('asd_ips', []) asd_start_port = config.get('asd_start_port', 8600) configuration_store = config.get('configuration_store', 'arakoon') ipmi_info = config.get('ipmi', ipmi_info) else: api_ip = Interactive.ask_choice( choice_options=ipaddresses, question='Select the public IP address to be used for the API', sort_choices=True) api_port = Interactive.ask_integer( question="Select the port to be used for the API", min_value=1025, max_value=65535, default_value=8500) asd_ips = [] add_ips = True ipaddresses.append('All') while add_ips: current_ips = ' - Current selected IPs: {0}'.format(asd_ips) new_asd_ip = Interactive.ask_choice( choice_options=ipaddresses, question= "Select an IP address to be used for the ASDs or 'All' (All current and future interfaces: 0.0.0.0){0}" .format(current_ips if len(asd_ips) > 0 else ''), default_value='All') if new_asd_ip == 'All': ipaddresses.remove('All') asd_ips = [ ] # Empty list maps to all IPs - checked when configuring ASDs add_ips = False else: asd_ips.append(new_asd_ip) ipaddresses.remove(new_asd_ip) add_ips = Interactive.ask_yesno( "Do you want to add another IP?") asd_start_port = Interactive.ask_integer( question="Select the port to be used for the ASDs", min_value=1025, max_value=65435, default_value=8600) configuration_store = 'arakoon' message = 'Do you want to set IPMI configuration keys?' proceed = Interactive.ask_yesno(message=message, default_value=False) if proceed is True: ipmi_info['ip'] = Interactive.ask_string( message='Enter the IPMI IP address', regex_info={'regex': ExtensionsToolbox.regex_ip}) ipmi_info['username'] = Interactive.ask_string( message='Enter the IPMI username') ipmi_info['pwd'] = Interactive.ask_password( message='Enter the IPMI password') if api_ip not in validation_ip_addresses: _print_and_log( level='error', message='\n' + Interactive.boxed_message(lines=[ 'Invalid API IP {0} specified. Please choose from:'.format( api_ip) ] + [' * {0}'.format(ip) for ip in ipaddresses])) sys.exit(1) different_ips = set(asd_ips).difference(set(validation_ip_addresses)) if different_ips: _print_and_log( level='error', message='\n' + Interactive.boxed_message(lines=[ 'Invalid ASD IPs {0} specified. Please choose from:'.format( asd_ips) ] + [' * {0}'.format(ip) for ip in ipaddresses])) sys.exit(1) if api_port in range(asd_start_port, asd_start_port + 100): _print_and_log( level='error', message='\n' + Interactive.boxed_message( ['API port cannot be in the range of the ASD port + 100'])) sys.exit(1) if interactive is True: while not local_client.file_exists(CACC_LOCATION): _print_and_log( level='warning', message= ' - Please place a copy of the Arakoon\'s client configuration file at: {0}' .format(CACC_LOCATION)) Interactive.ask_continue() local_client.file_write(filename=CONFIG_STORE_LOCATION, contents=json.dumps( {'configuration_store': configuration_store}, indent=4)) node_id = Configuration.initialize( config={ 'api_ip': api_ip, 'asd_ips': asd_ips, 'api_port': api_port, 'asd_start_port': asd_start_port, 'ipmi': ipmi_info }) # Model settings _print_and_log(message=' - Store settings in DB') for code, value in { 'api_ip': api_ip, 'api_port': api_port, 'configuration_store': configuration_store, 'node_id': node_id }.iteritems(): setting = Setting() setting.code = code setting.value = value setting.save() # Deploy/start services _print_and_log(message=' - Deploying and starting services') service_manager.add_service(name=MANAGER_SERVICE, client=local_client) service_manager.add_service(name=WATCHER_SERVICE, client=local_client) _print_and_log(message=' - Starting watcher service') try: service_manager.start_service(name=WATCHER_SERVICE, client=local_client) except Exception: Configuration.uninitialize() _print_and_log(level='exception', message='\n' + Interactive.boxed_message(['Starting watcher failed'])) sys.exit(1) _print_and_log(message='\n' + Interactive.boxed_message(['ASD Manager setup completed']))
class ASDController(object): """ ASD Controller class """ ASD_PREFIX = 'alba-asd' _logger = Logger('controllers') _local_client = SSHClient(endpoint='127.0.0.1', username='******') _service_manager = ServiceFactory.get_manager() @staticmethod def calculate_rocksdb_cache_size(is_ssd): """ Calculate the cache size for the RocksDB :param disk: disk on which the asd is running :type disk: source.dal.objects.disk.Disk :return: None or int """ if is_ssd: # No cache size is required to be specified for ASDs return None else: return 128 * 1024 * 1024 # 128 MiB @staticmethod def create_asd(disk): """ Creates and starts an ASD on a given disk :param disk: Disk on which to create an ASD :type disk: source.dal.objects.disk.Disk :return: None :rtype: NoneType """ # Validations if disk.state == 'MISSING': raise RuntimeError( 'Cannot create an ASD on missing disk {0}'.format(disk.name)) _node_id = SettingList.get_setting_by_code(code='node_id').value ipaddresses = Configuration.get('{0}|ips'.format( ASD_NODE_CONFIG_NETWORK_LOCATION.format(_node_id))) if len(ipaddresses) == 0: ipaddresses = OSFactory.get_manager().get_ip_addresses( client=ASDController._local_client) if len(ipaddresses) == 0: raise RuntimeError('Could not find any IP on the local node') alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component='alba' ) # Call here, because this potentially raises error, which should happen before actually making changes # Fetch disk information disk_size = int( ASDController._local_client.run( ['df', '-B', '1', '--output=size', disk.mountpoint], timeout=5).splitlines()[1]) # Find out appropriate disk size asd_size = int(math.floor(disk_size / (len(disk.asds) + 1))) for asd in disk.asds: if asd.has_config: config = Configuration.get(asd.config_key) config['capacity'] = asd_size cache_size = ASDController.calculate_rocksdb_cache_size( is_ssd=disk.is_ssd) if cache_size: config.update({'rocksdb_block_cache_size': cache_size}) Configuration.set(asd.config_key, config) try: ASDController._service_manager.send_signal( asd.service_name, signal.SIGUSR1, ASDController._local_client) except Exception as ex: ASDController._logger.info( 'Could not send signal to ASD for reloading the quota: {0}' .format(ex)) used_ports = [] for asd in ASDList.get_asds(): if asd.has_config: config = Configuration.get(asd.config_key) used_ports.append(config['port']) if 'rora_port' in config: used_ports.append(config['rora_port']) # Prepare & start service ASDController._logger.info('Setting up service for disk {0}'.format( disk.name)) asd_id = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(32)) homedir = '{0}/{1}'.format(disk.mountpoint, asd_id) base_port = Configuration.get('{0}|port'.format( ASD_NODE_CONFIG_NETWORK_LOCATION.format(_node_id))) asd_port = base_port rora_port = base_port + 1 while asd_port in used_ports: asd_port += 1 used_ports.append(asd_port) while rora_port in used_ports: rora_port += 1 asd_config = { 'ips': ipaddresses, 'home': homedir, 'port': asd_port, 'asd_id': asd_id, 'node_id': _node_id, 'capacity': asd_size, 'multicast': None, 'transport': 'tcp', 'log_level': 'info' } cache_size = ASDController.calculate_rocksdb_cache_size( is_ssd=disk.is_ssd) if cache_size: asd_config.update({'rocksdb_block_cache_size': cache_size}) if Configuration.get('/ovs/framework/rdma'): asd_config['rora_port'] = rora_port asd_config['rora_transport'] = 'rdma' if Configuration.exists('{0}/extra'.format( ASD_NODE_CONFIG_LOCATION.format(_node_id))): data = Configuration.get('{0}/extra'.format( ASD_NODE_CONFIG_LOCATION.format(_node_id))) asd_config.update(data) asd = ASD() asd.disk = disk asd.port = asd_port asd.hosts = ipaddresses asd.asd_id = asd_id asd.folder = asd_id asd.save() Configuration.set(asd.config_key, asd_config) params = { 'LOG_SINK': Logger.get_sink_path('alba-asd_{0}'.format(asd_id)), 'CONFIG_PATH': Configuration.get_configuration_path(asd.config_key), 'SERVICE_NAME': asd.service_name, 'ALBA_PKG_NAME': alba_pkg_name, 'ALBA_VERSION_CMD': alba_version_cmd } os.mkdir(homedir) ASDController._local_client.run(['chown', '-R', 'alba:alba', homedir]) ASDController._service_manager.add_service( name=ASDController.ASD_PREFIX, client=ASDController._local_client, params=params, target_name=asd.service_name) ASDController.start_asd(asd) @staticmethod def update_asd(asd, update_data): """ Updates an ASD with the 'update_data' provided :param asd: ASD to update :type asd: source.dal.objects.asd.ASD :param update_data: Data to update :type update_data: dict :raises ValueError: - When ASD configuration key is not present - When an unsupported key is passed in via 'update_data' :return: None :rtype: NoneType """ key_map = {'ips': 'hosts'} if not Configuration.exists(asd.config_key): raise ValueError( 'Failed to the configuration at location {0}'.format( asd.config_key)) config = Configuration.get(asd.config_key) for key, value in update_data.iteritems(): if key not in key_map: # Only updating IPs is supported for now raise ValueError( 'Unsupported property provided: {0}. Only IPs can be updated for now' .format(key)) setattr(asd, key_map[key], value) config[key] = value asd.save() Configuration.set(key=asd.config_key, value=config) @staticmethod def remove_asd(asd): """ Remove an ASD :param asd: ASD to remove :type asd: source.dal.objects.asd.ASD :return: None :rtype: NoneType """ if ASDController._service_manager.has_service( asd.service_name, ASDController._local_client): ASDController._service_manager.stop_service( asd.service_name, ASDController._local_client) ASDController._service_manager.remove_service( asd.service_name, ASDController._local_client) try: ASDController._local_client.dir_delete('{0}/{1}'.format( asd.disk.mountpoint, asd.asd_id)) except Exception: ASDController._logger.exception('Could not clean ASD data') Configuration.delete(asd.config_key) asd.delete() @staticmethod def start_asd(asd): """ Start an ASD :param asd: ASD to start :type asd: source.dal.objects.asd.ASD :return: None :rtype: NoneType """ if ASDController._service_manager.has_service( asd.service_name, ASDController._local_client): ASDController._service_manager.start_service( asd.service_name, ASDController._local_client) @staticmethod def stop_asd(asd): """ Stop an ASD :param asd: ASD to stop :type asd: source.dal.objects.asd.ASD :return: None :rtype: NoneType """ if ASDController._service_manager.has_service( asd.service_name, ASDController._local_client): ASDController._service_manager.stop_service( asd.service_name, ASDController._local_client) @staticmethod def restart_asd(asd): """ Restart an ASD :param asd: ASD to restart :type asd: source.dal.objects.asd.ASD :return: None :rtype: NoneType """ if ASDController._service_manager.has_service( asd.service_name, ASDController._local_client): ASDController._service_manager.restart_service( asd.service_name, ASDController._local_client) @staticmethod def list_asd_services(): """ Retrieve all ASD services :return: The ASD Services present on this ALBA Node :rtype: generator """ for service_name in ASDController._service_manager.list_services( ASDController._local_client): if service_name.startswith(ASD.ASD_SERVICE_PREFIX.format('')): yield service_name
def __init__(self): # type: () -> None self._configuration = self._get_configuration() self._service_manager = self._get_service_manager() self._client = SSHClient('127.0.0.1', username='******')
class ASD(ASDBase): """ Represents an ASD that has been deployed. """ ASD_CONFIG = '/ovs/alba/asds/{0}/config' ASD_SERVICE_PREFIX = 'alba-asd-{0}' _local_client = SSHClient(endpoint='127.0.0.1', username='******') _service_manager = ServiceFactory.get_manager() _table = 'asd' _properties = [ Property(name='port', property_type=int, unique=True, mandatory=True), Property(name='hosts', property_type=list, unique=False, mandatory=True), Property(name='asd_id', property_type=str, unique=True, mandatory=True), Property(name='folder', property_type=str, unique=False, mandatory=False) ] _relations = [['disk', Disk, 'asds']] _dynamics = ['service_name', 'config_key', 'has_config'] def _service_name(self): return ASD.ASD_SERVICE_PREFIX.format(self.asd_id) def _config_key(self): return ASD.ASD_CONFIG.format(self.asd_id) def _has_config(self): return Configuration.exists(self.config_key) def export(self): """ Exports the ASD information to a dict structure :return: Representation of the ASD as dict :rtype: dict """ if not self.has_config: raise RuntimeError('No configuration found for ASD {0}'.format( self.asd_id)) data = Configuration.get(self.config_key) for prop in self._properties: if prop.name == 'hosts': data['ips'] = getattr(self, prop.name) else: data[prop.name] = getattr(self, prop.name) if self.disk.state == 'MISSING': data.update({'state': 'error', 'state_detail': 'missing'}) else: output, error = ASD._local_client.run( ['ls', '{0}/{1}/'.format(self.disk.mountpoint, self.folder)], allow_nonzero=True, return_stderr=True) output += error if 'Input/output error' in output: data.update({'state': 'error', 'state_detail': 'io_error'}) elif ASD._service_manager.has_service(self.service_name, ASD._local_client): service_state = ASD._service_manager.get_service_status( self.service_name, ASD._local_client) if service_state == 'activating': data.update({ 'state': 'warning', 'state_detail': 'service_activating' }) elif service_state == 'active': data.update({'state': 'ok', 'state_detail': None}) else: data.update({ 'state': 'error', 'state_detail': 'service_failure' }) else: data.update({ 'state': 'error', 'state_detail': 'service_failure' }) return data
class Disk(ASDBase): """ Represents a disk on the system. """ _local_client = SSHClient(endpoint='127.0.0.1', username='******') _table = 'disk' _properties = [ Property(name='name', property_type=str, unique=True, mandatory=True), Property(name='state', property_type=str, unique=False, mandatory=False), Property(name='aliases', property_type=list, unique=True, mandatory=False), Property(name='is_ssd', property_type=bool, unique=False, mandatory=False), Property(name='model', property_type=str, unique=False, mandatory=False), Property(name='size', property_type=int, unique=False, mandatory=True), Property(name='serial', property_type=str, unique=True, mandatory=False), Property(name='partitions', property_type=list, unique=False, mandatory=False) ] _relations = [] _dynamics = [ 'mountpoint', 'available', 'usable', 'status', 'usage', 'partition_aliases' ] def _mountpoint(self): for partition in self.partitions: mountpoint = partition['mountpoint'] if mountpoint is not None: return mountpoint return None def _available(self): return self.mountpoint is None or not self.mountpoint.startswith( '/mnt/alba-asd/') def _usable(self): mountpoints = [] for partition in self.partitions: mountpoint = partition['mountpoint'] if mountpoint is not None: mountpoints.append(mountpoint) if len(mountpoints) > 1: return False # Multiple mount points: Not supported if self.mountpoint is not None: # Only one mount point. Accept if it managed by us if not self.mountpoint.startswith('/mnt/alba-asd/'): return False return True # No mount point(s): Search for "forbidden" partition types for partition in self.partitions: partition_filesystem = partition['filesystem'] if partition_filesystem in [ 'swap', 'linux_raid_member', 'LVM2_member' ]: return False return True def _status(self): if self.mountpoint is not None: if self.state == 'MISSING': return {'state': 'error', 'detail': 'missing'} output, error = self._local_client.run( ['ls', '{0}/'.format(self.mountpoint)], allow_nonzero=True, return_stderr=True, timeout=5) output += error if 'Input/output error' in output: return {'state': 'error', 'detail': 'io_error'} if len(self.asds) == 0: return {'state': 'empty'} return {'state': 'ok'} def _usage(self): if self.mountpoint is not None: df_info = self._local_client.run( "df -B 1 --output=size,used,avail '{0}' | tail -1 || true". format(self.mountpoint.replace(r"'", r"'\''")), allow_insecure=True, timeout=5).strip().splitlines() if len(df_info) == 1: size, used, available = df_info[0].split() return { 'size': int(size), 'used': int(used), 'available': int(available) } return {} def _partition_aliases(self): partition_aliases = [] for partition_info in self.partitions: partition_aliases += partition_info['aliases'] return partition_aliases def export(self): """ Exports this Disk's information to a dict structure :return: Representation of the Disk as dict :rtype: dict """ return { 'size': self.size, 'usage': self.usage, 'state': self.status['state'], 'device': '/dev/{0}'.format(self.name), 'aliases': self.aliases, 'node_id': SettingList.get_setting_by_code(code='node_id').value, 'available': self.available, 'mountpoint': self.mountpoint, 'state_detail': self.status.get('detail', ''), 'partition_amount': len(self.partitions), 'partition_aliases': self.partition_aliases }
def remove(silent=None): """ Interactive removal part for the ASD manager :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None :rtype: NoneType """ _print_and_log(message='\n' + Interactive.boxed_message(['ASD Manager removal'])) local_client = SSHClient(endpoint='127.0.0.1', username='******') if not local_client.file_exists( filename='{0}/main.db'.format(Setting.DATABASE_FOLDER)): _print_and_log(level='error', message='\n' + Interactive.boxed_message( ['The ASD Manager has already been removed'])) sys.exit(1) _print_and_log(message=' - Validating configuration management') try: Configuration.list(key='ovs') except: _print_and_log( level='exception', message='\n' + Interactive.boxed_message(['Could not connect to Arakoon'])) sys.exit(1) _print_and_log(message=' - Retrieving ASD information') all_asds = {} try: all_asds = ASDList.get_asds() except: _print_and_log(level='exception', message=' - Failed to retrieve the ASD information') interactive = silent != '--force-yes' if interactive is True: message = 'Are you sure you want to continue?' if len(all_asds) > 0: _print_and_log(message='\n\n+++ ALERT +++\n', level='warning') message = 'DATA LOSS possible if proceeding! Continue?' proceed = Interactive.ask_yesno(message=message, default_value=False) if proceed is False: _print_and_log(level='error', message='\n' + Interactive.boxed_message(['Abort removal'])) sys.exit(1) if len(all_asds) > 0: _print_and_log(message=' - Removing disks') for disk in DiskList.get_disks(): if disk.available is True: continue try: _print_and_log( message=' - Retrieving ASD information for disk {0}'. format(disk.name)) for asd in disk.asds: _print_and_log( message=' - Removing ASD {0}'.format(asd.name)) ASDController.remove_asd(asd) DiskController.clean_disk(disk) except Exception: _print_and_log(level='exception', message=' - Deleting ASDs failed') _print_and_log(message=' - Removing services') service_manager = ServiceFactory.get_manager() for service in MaintenanceController.get_services(): service_name = service _print_and_log( message=' - Removing service {0}'.format(service_name)) guid = None for alba_backend_guid in Configuration.list(key='/ovs/alba/backends'): for maintenance_service_name in Configuration.list( key='/ovs/alba/backends/{0}/maintenance/'.format( alba_backend_guid)): if maintenance_service_name == service_name: guid = alba_backend_guid break MaintenanceController.remove_maintenance_service( name=service_name, alba_backend_guid=guid) for service_name in [WATCHER_SERVICE, MANAGER_SERVICE]: if service_manager.has_service(name=service_name, client=local_client): _print_and_log( message=' - Removing service {0}'.format(service_name)) service_manager.stop_service(name=service_name, client=local_client) service_manager.remove_service(name=service_name, client=local_client) _print_and_log(message=' - Removing from configuration management') remaining_users = Configuration.uninitialize() if not remaining_users: local_client.file_delete(filenames=CACC_LOCATION) local_client.file_delete( filenames='{0}/main.db'.format(Setting.DATABASE_FOLDER)) _print_and_log( message='\n' + Interactive.boxed_message(['ASD Manager removal completed']))
class DiskController(object): """ Disk helper methods """ controllers = {} _local_client = SSHClient(endpoint='127.0.0.1', username='******') _logger = Logger('controllers') @staticmethod def sync_disks(): # type: () -> None """ Syncs the disks Changes made to this code should be reflected in the framework DiskController.sync_with_reality call. :return: None :rtype: NoneType """ node_id = SettingList.get_setting_by_code(code='node_id').value s3 = Configuration.get( ASD_NODE_CONFIG_MAIN_LOCATION_S3.format(node_id), default=False) disks, name_alias_mapping = DiskTools.model_devices(s3=s3) disks_by_name = dict((disk.name, disk) for disk in disks) alias_name_mapping = name_alias_mapping.reverse_mapping() # Specific for the asd-manager: handle unique constraint exception DiskController._prepare_for_name_switch(disks) # Sync the model for disk in DiskList.get_disks(): generic_disk_model = None # type: GenericDisk for alias in disk.aliases: # IBS wont have alias if alias in alias_name_mapping: name = alias_name_mapping[alias].replace('/dev/', '') if name in disks_by_name: generic_disk_model = disks_by_name.pop(name) break # Partitioned loop, nvme devices no longer show up in alias_name_mapping if generic_disk_model is None and disk.name in disks_by_name and ( disk.name.startswith(tuple(['fio', 'loop', 'nvme']))): generic_disk_model = disks_by_name.pop(disk.name) if not generic_disk_model: # Remove disk / partitions if not reported by 'lsblk' DiskController._remove_disk_model(disk) else: # Update existing disks and their partitions DiskController._sync_disk_with_model(disk, generic_disk_model) # Create all disks and their partitions not yet modeled for disk_name, generic_disk_model in disks_by_name.iteritems(): DiskController._model_disk(generic_disk_model) @classmethod def _remove_disk_model(cls, modeled_disk): # type: (Disk) -> None """ Remove the modeled disk :param modeled_disk: The modeled disk :type modeled_disk: Disk :return: None :rtype: NoneType """ cls._logger.info('Disk {0} - No longer found'.format( modeled_disk.name)) if len(modeled_disk.asds) == 0: modeled_disk.delete() cls._logger.info('Disk {0} - Deleted (no ASDs)'.format( modeled_disk.name)) else: if modeled_disk.state != 'MISSING': for partition in modeled_disk.partitions: cls._logger.warning( 'Disk {0} - Partition with offset {1} - Updated status to MISSING' .format(modeled_disk.name, partition['offset'])) modeled_disk.state = 'MISSING' modeled_disk.save() cls._logger.warning( 'Disk {0} - Updated status to MISSING'.format( modeled_disk.name)) @classmethod def _sync_disk_with_model(cls, modeled_disk, generic_modeled_disk): # type: (Disk, GenericDisk) -> None """ Sync a generic disk with the modeled disk :param modeled_disk: The modeled disk :type modeled_disk: Disk :param generic_modeled_disk: The generic modeled disk (returned by Disktools) :type generic_modeled_disk: GenericDisk :return: None :rtype NoneType """ cls._logger.info('Disk {0} - Found, updating'.format( modeled_disk.name)) cls._update_disk(modeled_disk, generic_modeled_disk) @classmethod def _model_disk(cls, generic_disk_model): # type: (GenericDisk) -> Disk """ Models a disk :param generic_disk_model: The generic modeled disk (returned by Disktools) :type generic_disk_model: GenericDisk :return: The newly modeled disk :rtype: Disk """ cls._logger.info('Disk {0} - Creating disk - {1}'.format( generic_disk_model.name, generic_disk_model.__dict__)) disk = Disk() disk.name = generic_disk_model.name cls._update_disk(disk, generic_disk_model) return disk @staticmethod def _update_disk(modeled_disk, generic_disk_model): # type: (Disk, GenericDisk) -> None """ Updates a disk Copies all properties from the generic modeled disk to the own model :param modeled_disk: The modeled disk :type modeled_disk: Disk :param generic_disk_model: The generic modeled disk (returned by Disktools) :type generic_disk_model: GenericDisk :return: None :rtype NoneType """ for prop in [ 'state', 'aliases', 'is_ssd', 'model', 'size', 'name', 'serial', 'partitions' ]: if hasattr(generic_disk_model, prop): if prop == 'partitions': # Update partition info partitions_as_dicts = [ partition.__dict__ for partition in generic_disk_model.partitions ] modeled_disk.partitions = partitions_as_dicts else: setattr(modeled_disk, prop, getattr(generic_disk_model, prop)) modeled_disk.save() @classmethod def _prepare_for_name_switch(cls, generic_disks): # type: (List[GenericDisk]) -> None """ This manager has a unique constraint on the disk name It could happen that a disk switched drive letter. To avoid any issues while syncing the disk, the name is temporarily changed :param generic_disks: List of the disks currently found by the system :type generic_disks: list :return: None :rtype: NoneType """ # Check names to avoid a unique constraint exception for generic_disk in generic_disks: # type: GenericDisk if len(generic_disk.aliases) >= 1: disk_alias = generic_disk.aliases[0] try: disk = DiskList.get_by_alias(disk_alias) if generic_disk.name != generic_disk.name: cls._logger.info( 'Disk with alias {0} its name has changed from {1} to {2},' ' changing disk names to circumvent unique constraints' .format(disk_alias, disk.name, generic_disk.name)) disk.name = str(uuid.uuid4()) disk.save() except ObjectNotFoundException: # No disk with such an alias. Will be caught later in the sync disk by adding the left-over models pass @classmethod def prepare_disk(cls, disk): """ Prepare a disk for use with ALBA :param disk: Disk object to prepare :type disk: source.dal.objects.disk.Disk :return: None """ if disk.usable is False: raise RuntimeError('Cannot prepare disk {0}'.format(disk.name)) cls._logger.info('Preparing disk {0}'.format(disk.name)) # Create partition mountpoint = '/mnt/alba-asd/{0}'.format(''.join( random.choice(string.ascii_letters + string.digits) for _ in range(16))) alias = disk.aliases[0] cls._locate(device_alias=alias, start=False) cls._local_client.run(['umount', disk.mountpoint], allow_nonzero=True) cls._local_client.run(['parted', alias, '-s', 'mklabel', 'gpt']) cls._local_client.run([ 'parted', alias, '-s', 'mkpart', alias.split('/')[-1], '2MB', '100%' ]) cls._local_client.run(['udevadm', 'settle' ]) # Waits for all udev rules to have finished # Wait for partition to be ready by attempting to add filesystem counter = 0 already_mounted = False while True: disk = Disk(disk.id) if len(disk.partitions) == 1: try: cls._local_client.run( ['mkfs.xfs', '-qf', disk.partition_aliases[0]]) break except CalledProcessError: mountpoint = disk.mountpoint if mountpoint and mountpoint in cls._local_client.run( ['mount']): # Some OSes have auto-mount functionality making mkfs.xfs to fail when the mountpoint has already been mounted # This can occur when the exact same partition gets created on the device already_mounted = True if mountpoint.startswith('/mnt/alba-asd'): cls._local_client.run( 'rm -rf {0}/*'.format(mountpoint), allow_insecure=True) cls._logger.warning( 'Device has already been used by ALBA, re-using mountpoint {0}' .format(mountpoint)) break cls._logger.info('Partition for disk {0} not ready yet'.format( disk.name)) cls.sync_disks() time.sleep(0.2) counter += 1 if counter > 10: raise RuntimeError( 'Partition for disk {0} not ready in 2 seconds'.format( disk.name)) # Create mountpoint and mount cls._local_client.run(['mkdir', '-p', mountpoint]) FSTab.add(partition_aliases=[disk.partition_aliases[0]], mountpoint=mountpoint) if already_mounted is False: cls._local_client.run(['mount', mountpoint]) cls.sync_disks() cls._local_client.run(['chown', '-R', 'alba:alba', mountpoint]) cls._logger.info('Prepare disk {0} complete'.format(disk.name)) @classmethod def clean_disk(cls, disk): """ Removes the given disk :param disk: Disk object to clean :type disk: source.dal.objects.disk.Disk :return: None """ if disk.usable is False: raise RuntimeError('Cannot clean disk {0}'.format(disk.name)) cls._logger.info('Cleaning disk {0}'.format(disk.name)) FSTab.remove(disk.partition_aliases) if disk.mountpoint is not None: umount_cmd = ['umount', disk.mountpoint] try: cls._local_client.run(umount_cmd) cls._local_client.dir_delete(disk.mountpoint) except Exception: cls._logger.exception( 'Failure to umount or delete the mountpoint') raise try: cls._local_client.run( ['parted', disk.aliases[0], '-s', 'mklabel', 'gpt']) except CalledProcessError: # Wiping the partition is a nice-to-have and might fail when a disk is e.g. unavailable pass cls.sync_disks() cls._locate(device_alias=disk.aliases[0], start=True) cls._logger.info('Clean disk {0} complete'.format(disk.name)) @classmethod def remount_disk(cls, disk): """ Remount the disk :param disk: Disk object to remount :type disk: source.dal.objects.disk.Disk :return: None """ if disk.usable is False: raise RuntimeError('Cannot remount disk {0}'.format(disk.name)) cls._logger.info('Remounting disk {0}'.format(disk.name)) cls._local_client.run(['umount', '-l', disk.mountpoint], timeout=10, allow_nonzero=True) cls._local_client.run(['mount', disk.mountpoint], timeout=10, allow_nonzero=True) cls._logger.info('Remounting disk {0} complete'.format(disk.name)) @classmethod def scan_controllers(cls): """ Scan the disk controller(s) :return: None """ cls._logger.info('Scanning controllers') controllers = {} has_storecli = cls._local_client.run(['which', 'storcli64'], allow_nonzero=True).strip() != '' if has_storecli is True: controller_info = json.loads( cls._local_client.run( ['storcli64', '/call/eall/sall', 'show', 'all', 'J'])) for controller in controller_info['Controllers']: if controller['Command Status']['Status'] == 'Failure': continue data = controller['Response Data'] drive_locations = set( drive.split(' ')[1] for drive in data.keys()) for location in drive_locations: if data['Drive {0}'.format( location)][0]['State'] == 'JBOD': wwn = data['Drive {0} - Detailed Information'.format( location)]['Drive {0} Device attributes'.format( location)]['WWN'] controllers[wwn] = ('storcli64', location) cls.controllers = controllers cls._logger.info('Scan complete') @classmethod def _locate(cls, device_alias, start): """ Locate the disk on the controller :param device_alias: Alias for the device (eg: '/dev/disk/by-path/pci-0000:03:00.0-sas-0x5000c29f4cf04566-lun-0' or 'pci-0000:03:00.0-sas-0x5000c29f4cf04566-lun-0') :type device_alias: str :param start: True to start locating, False otherwise :type start: bool :return: None """ if cls.controllers == {}: cls.scan_controllers() for wwn in cls.controllers: if device_alias and device_alias.endswith(wwn): controller_type, location = cls.controllers[wwn] if controller_type == 'storcli64': cls._logger.info('Location {0} for {1}'.format( 'start' if start is True else 'stop', location)) cls._local_client.run([ 'storcli64', location, 'start' if start is True else 'stop', 'locate' ])
class SDMUpdateController(object): """ Update Controller class for SDM package """ _local_client = SSHClient(endpoint='127.0.0.1', username='******') _logger = Logger(name='update', forced_target_type='file') _package_manager = PackageFactory.get_manager() _service_manager = ServiceFactory.get_manager() @classmethod def get_package_information(cls): """ Retrieve the installed and candidate versions of all packages relevant for this repository (See PackageFactory.get_package_info) If installed version is lower than candidate version, this information is stored If installed version is equal or higher than candidate version we verify whether all relevant services have the correct binary active Whether a service has the correct binary version in use, we use the ServiceFactory.get_service_update_versions functionality In this function the services for each component / package combination are defined This service information consists out of: * Services to stop (before update) and start (after update of packages) -> 'services_stop_start' * Services to restart after update (post-update logic) -> 'services_post_update' * Down-times which will be caused due to service restarts -> 'downtime' * Prerequisites that have not been met -> 'prerequisites' The installed vs candidate version which is displayed always gives priority to the versions effectively installed on the system and not the versions as reported by the service files This combined information is then stored in the 'package_information' of the ALBA Node DAL object :return: Update information :rtype: dict """ cls._logger.info('Refreshing update information') binaries = cls._package_manager.get_binary_versions(client=cls._local_client) update_info = {} package_info = PackageFactory.get_packages_to_update(client=cls._local_client) # {'alba': {'openvstorage-sdm': {'installed': 'ee-1.6.1', 'candidate': 'ee-1.6.2'}}} cls._logger.debug('Binary versions found: {0}'.format(binaries)) cls._logger.debug('Package info found: {0}'.format(package_info)) for component, package_names in PackageFactory.get_package_info()['names'].iteritems(): package_names = sorted(package_names) cls._logger.debug('Validating component {0} and related packages: {1}'.format(component, package_names)) if component not in update_info: update_info[component] = copy.deepcopy(ServiceFactory.DEFAULT_UPDATE_ENTRY) svc_component_info = update_info[component] pkg_component_info = package_info.get(component, {}) for package_name in package_names: cls._logger.debug('Validating package {0}'.format(package_name)) if package_name in [PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE]: for service_name in sorted(list(ASDController.list_asd_services())) + sorted(list(MaintenanceController.get_services())): service_version = ServiceFactory.get_service_update_versions(client=cls._local_client, service_name=service_name, binary_versions=binaries) cls._logger.debug('Service {0} has version: {1}'.format(service_name, service_version)) # If package_name in pkg_component_info --> update available (installed <--> candidate) # If service_version is not None --> service is running an older binary version if package_name in pkg_component_info or service_version is not None: svc_component_info['services_post_update'][20].append(service_name) if service_version is not None and package_name not in svc_component_info['packages']: svc_component_info['packages'][package_name] = service_version # Extend the service information with the package information related to this repository for current ALBA Node if package_name in pkg_component_info and package_name not in svc_component_info['packages']: cls._logger.debug('Adding package {0} because it has an update available'.format(package_name)) svc_component_info['packages'][package_name] = pkg_component_info[package_name] cls._logger.info('Refreshed update information') return update_info @classmethod def update(cls, package_name): """ Update the package on the local node :return: None :rtype: NoneType """ cls._logger.info('Installing package {0}'.format(package_name)) cls._package_manager.install(package_name=package_name, client=cls._local_client) cls._logger.info('Installed package {0}'.format(package_name)) @classmethod def get_installed_version_for_package(cls, package_name): """ Retrieve the currently installed package version :param package_name: Name of the package to retrieve the version for :type package_name: str :return: Version of the currently installed package :rtype: str """ installed_version = cls._package_manager.get_installed_versions(client=None, package_names=[package_name]) if package_name in installed_version: return str(installed_version[package_name]) @classmethod def restart_services(cls, service_names): """ Restart the services specified :param service_names: Names of the services to restart :type service_names: list[str] :return: None :rtype: NoneType """ if len(service_names) == 0: service_names = [service_name for service_name in ASDController.list_asd_services()] service_names.extend([service_name for service_name in MaintenanceController.get_services()]) for service_name in service_names: cls._logger.warning('Verifying whether service {0} needs to be restarted'.format(service_name)) if cls._service_manager.get_service_status(service_name, cls._local_client) != 'active': cls._logger.warning('Found stopped service {0}. Will not start it.'.format(service_name)) continue cls._logger.info('Restarting service {0}'.format(service_name)) try: cls._service_manager.restart_service(service_name, cls._local_client) except CalledProcessError: cls._logger.exception('Failed to restart service {0}'.format(service_name)) @classmethod def execute_migration_code(cls): # type: () -> None """ Run some migration code after an update has been done :return: None :rtype: NoneType """ cls._logger.info('Starting out of band migrations for SDM nodes') ########################### # Start crucial migration # ########################### # Removal of bootstrap file and store API IP, API port and node ID in SQLite DB try: if cls._local_client.file_exists(BOOTSTRAP_FILE): cls._logger.info('Bootstrap file still exists. Retrieving node ID') with open(BOOTSTRAP_FILE) as bstr_file: node_id = json.load(bstr_file)['node_id'] else: node_id = SettingList.get_setting_by_code(code='node_id').value except Exception: cls._logger.exception('Unable to determine the node ID, cannot migrate') raise try: api_settings_map = {'api_ip': 'ip', 'api_port': 'port'} # Map settings code to keys in the Config management required_settings = ['node_id', 'migration_version'] + api_settings_map.keys() for settings_code in required_settings: try: _ = SettingList.get_setting_by_code(settings_code) except ObjectNotFoundException: cls._logger.info('Missing required settings: {0}'.format(settings_code)) if settings_code == 'node_id': value = node_id elif settings_code in api_settings_map.keys(): # Information must be extracted from Configuration main_config = Configuration.get(ASD_NODE_CONFIG_MAIN_LOCATION.format(node_id)) value = main_config[api_settings_map[settings_code]] elif settings_code == 'migration_version': # Introduce version for ASD Manager migration code value = 0 else: raise NotImplementedError('No action implemented for setting {0}'.format(settings_code)) cls._logger.info('Modeling Setting with code {0} and value {1}'.format(settings_code, value)) setting = Setting() setting.code = settings_code setting.value = value setting.save() if cls._local_client.file_exists(BOOTSTRAP_FILE): cls._logger.info('Removing the bootstrap file') cls._local_client.file_delete(BOOTSTRAP_FILE) except Exception: cls._logger.exception('Error during migration of code settings. Unable to proceed') raise ############################### # Start non-crucial migration # ############################### errors = [] migration_setting = SettingList.get_setting_by_code(code='migration_version') # Add installed package_name in version files and additional string replacements in service files try: if migration_setting.value < 1: cls._logger.info('Adding additional information to service files') edition = Configuration.get_edition() if edition == PackageFactory.EDITION_ENTERPRISE: for version_file_name in cls._local_client.file_list(directory=ServiceFactory.RUN_FILE_DIR): version_file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, version_file_name) contents = cls._local_client.file_read(filename=version_file_path) if '{0}='.format(PackageFactory.PKG_ALBA) in contents: contents = contents.replace(PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE) cls._local_client.file_write(filename=version_file_path, contents=contents) node_id = SettingList.get_setting_by_code(code='node_id').value asd_services = list(ASDController.list_asd_services()) maint_services = list(MaintenanceController.get_services()) for service_name in asd_services + maint_services: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(node_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) if 'RUN_FILE_DIR' in config: continue config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = PackageFactory.PKG_ALBA_EE config['ALBA_VERSION_CMD'] = PackageFactory.VERSION_CMD_ALBA Configuration.set(key=config_key, value=config) cls._service_manager.regenerate_service(name=ASDController.ASD_PREFIX if service_name in asd_services else MaintenanceController.MAINTENANCE_PREFIX, client=cls._local_client, target_name=service_name) except Exception as ex: cls._logger.exception('Failed to regenerate the ASD and Maintenance services') errors.append(ex) try: if migration_setting.value < 2: if System.get_component_identifier() not in Configuration.get(Configuration.get_registration_key(), default=[]): Configuration.register_usage(System.get_component_identifier()) except Exception as ex: cls._logger.exception('Failed to register the asd-manager') errors.append(ex) if len(errors) == 0: cls._logger.info('No errors during non-crucial migration. Saving the migration setting') # Save migration settings when no errors occurred migration_setting = SettingList.get_setting_by_code(code='migration_version') migration_setting.value = 2 migration_setting.save() cls._logger.info('Finished out of band migrations for SDM nodes')
def migrate(cls): # type: () -> None """ Execute the migration logic. :return: None :rtype: NoneType """ with file_mutex('package_update_pu'): local_client = SSHClient(endpoint='127.0.0.1', username='******') # Override the created openvstorage_sdm_id during package install, with currently available SDM ID if local_client.file_exists(BOOTSTRAP_FILE): with open(BOOTSTRAP_FILE) as bstr_file: node_id = json.load(bstr_file)['node_id'] local_client.file_write(filename='/etc/openvstorage_sdm_id', contents=node_id + '\n') else: with open('/etc/openvstorage_sdm_id', 'r') as id_file: node_id = id_file.read().strip() key = '{0}/versions'.format( ASD_NODE_CONFIG_LOCATION.format(node_id)) version = Configuration.get(key) if Configuration.exists( key) else 0 asd_manager_service_name = 'asd-manager' if cls.service_manager.has_service( asd_manager_service_name, local_client) and cls.service_manager.get_service_status( asd_manager_service_name, local_client) == 'active': cls.logger.info('Stopping asd-manager service') cls.service_manager.stop_service(asd_manager_service_name, local_client) # @TODO: Move these migrations to alba_node.client.update_execute_migration_code() if version < cls.CURRENT_VERSION: try: # DB migrations from source.controllers.asd import ASDController from source.controllers.disk import DiskController from source.dal.asdbase import ASDBase from source.dal.lists.asdlist import ASDList from source.dal.lists.disklist import DiskList from source.dal.objects.asd import ASD if not local_client.file_exists('{0}/main.db'.format( ASDBase.DATABASE_FOLDER)): local_client.dir_create([ASDBase.DATABASE_FOLDER]) asd_map = dict( (asd.asd_id, asd) for asd in ASDList.get_asds()) DiskController.sync_disks() for disk in DiskList.get_usable_disks(): if disk.state == 'MISSING' or disk.mountpoint is None: continue for asd_id in local_client.dir_list(disk.mountpoint): if asd_id in asd_map: asd = asd_map[asd_id] else: asd = ASD() asd.disk = disk asd.asd_id = asd_id asd.folder = asd_id if asd.has_config: if asd.port is None or asd.hosts is None: config = Configuration.get( key=asd.config_key) asd.port = config['port'] asd.hosts = config.get('ips', []) asd.save() # Adjustment of open file descriptors for ASD/maintenance services to 8192 asd_service_names = list(ASDController.list_asd_services()) maintenance_service_names = list( MaintenanceController.get_services()) for service_name in asd_service_names + maintenance_service_names: if cls.service_manager.has_service( name=service_name, client=local_client): if cls.service_manager.__class__ == Systemd: path = '/lib/systemd/system/{0}.service'.format( service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format( service_name) check = 'limit nofile 8192 8192' restart_required = False if os.path.exists(path): with open(path, 'r') as system_file: if check not in system_file.read(): restart_required = True if restart_required is False: continue configuration_key = ServiceFactory.SERVICE_CONFIG_KEY.format( node_id, service_name) if Configuration.exists(configuration_key): # Rewrite the service file cls.service_manager.add_service( name=ASDController.ASD_PREFIX if service_name in asd_service_names else MaintenanceController.MAINTENANCE_PREFIX, client=local_client, params=Configuration.get( configuration_key), target_name=service_name) # Let the update know that the ASD / maintenance services need to be restarted # Inside `if Configuration.exists`, because useless to rapport restart if we haven't rewritten service file ExtensionsToolbox.edit_version_file( client=local_client, package_name='alba', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, service_name)) if cls.service_manager.__class__ == Systemd: local_client.run(['systemctl', 'daemon-reload']) # Version 3: Addition of 'ExecReload' for ASD/maintenance SystemD services if cls.service_manager.__class__ == Systemd: # Upstart does not have functionality to reload a process' configuration reload_daemon = False asd_service_names = list( ASDController.list_asd_services()) maintenance_service_names = list( MaintenanceController.get_services()) for service_name in asd_service_names + maintenance_service_names: if not cls.service_manager.has_service( name=service_name, client=local_client): continue path = '/lib/systemd/system/{0}.service'.format( service_name) if os.path.exists(path): with open(path, 'r') as system_file: if 'ExecReload' not in system_file.read(): reload_daemon = True configuration_key = ServiceFactory.SERVICE_CONFIG_KEY.format( node_id, service_name) if Configuration.exists( configuration_key): # No need to edit the service version file, since this change only requires a daemon-reload cls.service_manager.add_service( name=ASDController.ASD_PREFIX if service_name in asd_service_names else MaintenanceController. MAINTENANCE_PREFIX, client=local_client, params=Configuration.get( configuration_key), target_name=service_name) if reload_daemon is True: local_client.run(['systemctl', 'daemon-reload']) # Version 6: Introduction of Active Drive all_local_ips = OSFactory.get_manager().get_ip_addresses( client=local_client) for asd in ASDList.get_asds(): if asd.has_config: asd_config = Configuration.get(asd.config_key) if 'multicast' not in asd_config: asd_config['multicast'] = None if 'ips' in asd_config: asd_ips = asd_config['ips'] or all_local_ips else: asd_ips = all_local_ips asd.hosts = asd_ips asd_config['ips'] = asd_ips Configuration.set(asd.config_key, asd_config) asd.save() # Version 7: Moving flask certificate files to config dir for file_name in [ 'passphrase', 'server.crt', 'server.csr', 'server.key' ]: if local_client.file_exists( '/opt/asd-manager/source/{0}'.format( file_name)): local_client.file_move( source_file_name='/opt/asd-manager/source/{0}'. format(file_name), destination_file_name= '/opt/asd-manager/config/{0}'.format( file_name)) except: cls.logger.exception( 'Error while executing post-update code on node {0}'. format(node_id)) Configuration.set(key, cls.CURRENT_VERSION) if cls.service_manager.has_service( asd_manager_service_name, local_client) and cls.service_manager.get_service_status( asd_manager_service_name, local_client) != 'active': cls.logger.info('Starting asd-manager service') cls.service_manager.start_service(asd_manager_service_name, local_client) cls.logger.info('Post-update logic executed')
class MaintenanceController(object): """ Maintenance controller class """ MAINTENANCE_KEY = '/ovs/alba/backends/{0}/maintenance/{1}' MAINTENANCE_PREFIX = 'alba-maintenance' _local_client = SSHClient(endpoint='127.0.0.1', username='******') _service_manager = ServiceFactory.get_manager() @staticmethod def get_services(): """ Retrieve all configured maintenance services running on this node for each backend :return: The maintenance services present on this ALBA Node :rtype: generator """ for service_name in MaintenanceController._service_manager.list_services( MaintenanceController._local_client): if service_name.startswith( MaintenanceController.MAINTENANCE_PREFIX): yield service_name @staticmethod def add_maintenance_service(name, alba_backend_guid, abm_name, read_preferences=None): """ Add a maintenance service with a specific name :param name: Name of the maintenance service to add :type name: str :param alba_backend_guid: ALBA Backend GUID for which the maintenance service needs to run :type alba_backend_guid: str :param abm_name: Name of the ABM cluster :type abm_name: str :param read_preferences: List of ALBA Node IDs (LOCAL) or ALBA IDs of linked ALBA Backends (GLOBAL) for the maintenance services where they should prioritize the READ actions :type read_preferences: list[str] :return: None :rtype: NoneType """ if MaintenanceController._service_manager.has_service( name, MaintenanceController._local_client) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_ALBA) config_location = '{0}/config'.format( MaintenanceController.MAINTENANCE_KEY.format( alba_backend_guid, name)) params = { 'LOG_SINK': Logger.get_sink_path('alba_maintenance'), 'ALBA_CONFIG': Configuration.get_configuration_path(config_location), 'ALBA_PKG_NAME': alba_pkg_name, 'ALBA_VERSION_CMD': alba_version_cmd } Configuration.set( key=config_location, value={ 'log_level': 'info', 'albamgr_cfg_url': Configuration.get_configuration_path( '/ovs/arakoon/{0}/config'.format(abm_name)), 'read_preference': [] if read_preferences is None else read_preferences, 'multicast_discover_osds': False }) MaintenanceController._service_manager.add_service( name=MaintenanceController.MAINTENANCE_PREFIX, client=MaintenanceController._local_client, params=params, target_name=name) MaintenanceController._service_manager.start_service( name, MaintenanceController._local_client) @staticmethod def remove_maintenance_service(name, alba_backend_guid=None): """ Remove a maintenance service with a specific name :param name: Name of the service to remove :type name: str :param alba_backend_guid: ALBA Backend GUID for which the maintenance service needs to be removed Defaults to None for backwards compatibility :type alba_backend_guid: str :return: None :rtype: NoneType """ if MaintenanceController._service_manager.has_service( name, MaintenanceController._local_client): MaintenanceController._service_manager.stop_service( name, MaintenanceController._local_client) MaintenanceController._service_manager.remove_service( name, MaintenanceController._local_client) if alba_backend_guid is not None: key = MaintenanceController.MAINTENANCE_KEY.format( alba_backend_guid, name) if Configuration.dir_exists(key=key): Configuration.delete(key=key)