def save(self, client=None, reload_config=True): """ Saves the configuration to a given file, optionally a remote one :param client: If provided, save remote configuration :param reload_config: Reload the running Storage Driver configuration """ self._validate() for key in self.configuration: contents = json.dumps(self.configuration[key], indent=4) EtcdConfiguration.set(self.path.format(key), contents, raw=True) if self.config_type == 'storagedriver' and reload_config is True: if len(self.dirty_entries) > 0: if client is None: self._logger.info('Applying local storagedriver configuration changes') changes = LSRClient(self.remote_path).update_configuration(self.remote_path) else: self._logger.info('Applying storagedriver configuration changes on {0}'.format(client.ip)) with remote(client.ip, [LSRClient]) as rem: changes = copy.deepcopy(rem.LocalStorageRouterClient(self.remote_path).update_configuration(self.remote_path)) for change in changes: if change['param_name'] not in self.dirty_entries: raise RuntimeError('Unexpected configuration change: {0}'.format(change['param_name'])) self._logger.info('Changed {0} from "{1}" to "{2}"'.format(change['param_name'], change['old_value'], change['new_value'])) self.dirty_entries.remove(change['param_name']) self._logger.info('Changes applied') if len(self.dirty_entries) > 0: self._logger.warning('Following changes were not applied: {0}'.format(', '.join(self.dirty_entries))) else: self._logger.debug('No need to apply changes, nothing changed') self.is_new = False self.dirty_entries = []
def is_host_configured_for_vpool(self, vpool_guid, ip): if (self._is_devstack is False and self._is_openstack is False) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('vPool configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return False # 1. Check cinder driver vpool = VPool(vpool_guid) with remote(ip, [RawConfigParser], 'root') as rem: cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if not cfg.has_section(vpool.name): self._logger.info('Section "{0}" was not found in cinder configuration file'.format(vpool.name)) return False for key, value in {"volume_driver": "cinder.volume.drivers.openvstorage.OVSVolumeDriver", "volume_backend_name": vpool.name, "vpool_name": vpool.name}.iteritems(): if cfg.get(vpool.name, key) != value: self._logger.info('Configuration parameter "{0}" does not contain the expected value "{1}"'.format(key, value)) return False enabled_backends = [] if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name not in enabled_backends: self._logger.info('vPool {0} has not been added to the enabled backends property'.format(vpool.name)) return False # 2. Check volume type if self.cinder_client and not [volume_type for volume_type in self.cinder_client.volume_types.list() if volume_type.name == vpool.name]: self._logger.info('Cinder client does not contain a volume of type "{0}"'.format(vpool.name)) return False return True
def unconfigure_vpool_for_host(self, vpool_guid, remove_volume_type, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Unconfigure vPool: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return vpool = VPool(vpool_guid) with remote(ip, [RawConfigParser, open], 'root') as rem: changed = False cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if cfg.has_section(vpool.name): changed = True cfg.remove_section(vpool.name) if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name in enabled_backends: changed = True enabled_backends.remove(vpool.name) cfg.set("DEFAULT", "enabled_backends", ", ".join(enabled_backends)) if changed is True: with rem.open(self._CINDER_CONF, "w") as fp: cfg.write(fp) if remove_volume_type and self.cinder_client: for volume_type in self.cinder_client.volume_types.list(): if volume_type.name == vpool.name: try: self.cinder_client.volume_types.delete(volume_type.id) except Exception as ex: self._logger.info('Removing volume type from cinder failed with error: {0}'.format(ex)) pass self._restart_processes()
def file_list(self, directory, abs_path=False, recursive=False): """ List all files in directory WARNING: If executed recursively while not locally, this can take quite some time :param directory: Directory to list the files in :param abs_path: Return the absolute path of the files or only the file names :param recursive: Loop through the directories recursively :return: List of files in directory """ all_files = [] if self.is_local is True: for root, dirs, files in os.walk(directory): for file_name in files: if abs_path is True: all_files.append('/'.join([root, file_name])) else: all_files.append(file_name) if recursive is False: break else: with remote(self.ip, [os], 'root') as rem: for root, dirs, files in rem.os.walk(directory): for file_name in files: if abs_path is True: all_files.append('/'.join([root, file_name])) else: all_files.append(file_name) if recursive is False: break return all_files
def file_list(self, directory, abs_path=False, recursive=False): """ List all files in directory WARNING: If executed recursively while not locally, this can take quite some time :param directory: Directory to list the files in :param abs_path: Return the absolute path of the files or only the file names :param recursive: Loop through the directories recursively :return: List of files in directory """ all_files = [] if self.is_local is True: for root, dirs, files in os.walk(directory): for file_name in files: if abs_path is True: all_files.append('/'.join([root, file_name])) else: all_files.append(file_name) if recursive is False: break else: with remote(self.ip, [os], 'root') as rem: for root, dirs, files in rem.os.walk(directory): for file_name in files: if abs_path is True: all_files.append('/'.join([root, file_name])) else: all_files.append(file_name) if recursive is False: break return all_files
def execute_search_on_remote(since=None, until=None, search_locations=None, hosts=None, python_error=False, mode='search', username='******', password=None, suppress_return=False, search_patterns=None): """ Searches all hosts for entries between given dates. Can be used standalone on the execution machine :param since: Starting date :type since: str / Datetime :param until: End date :type until: str / Datetime :param search_locations: list of paths of files / servicenames that will be searched on all nodes :type search_locations: list of str :param hosts: Ip of the nodes :type hosts: list of str :param mode: Search mode :type mode: str :param python_error: Whether only python errors should be checked :type python_error: Boolean :param username: Username of the user to login :type username: str :param password: Password of the user to login :type password: str :param suppress_return: only write to file and not return contents :type suppress_return: Boolean :return: Output of a file as string """ # Validate parameter if mode not in LogFileTimeParser.POSSIBLE_MODES: raise ValueError('Mode "{0}" is not supported. Possible modes are {1}'.format(mode, ', '.join(LogFileTimeParser.POSSIBLE_MODES))) # Clear cache open(LogFileTimeParser.FILE_PATH_REMOTE, 'w').close() since, until, search_locations, hosts = LogFileTimeParser._default_starting_values(since, until, search_locations, hosts) # Setup remote instances with remote(hosts, [LogFileTimeParser], username=username, password=password) as remotes: for host in hosts: results = '' if mode == 'search': # Execute search results = remotes[host].LogFileTimeParser.get_lines_between_timestamps(since=since, until=until, search_locations=search_locations, search_patterns=search_patterns, host=host) elif mode == 'error-search': # Execute search results = remotes[host].LogFileTimeParser.search_for_errors(since=since, until=until, search_locations=search_locations, host=host, python_error=python_error) # Append output to cache with open(LogFileTimeParser.FILE_PATH_REMOTE, 'a') as output_file: output_file.write(str(results)) if not suppress_return: with open(LogFileTimeParser.FILE_PATH_REMOTE, 'r') as output_file: return output_file.read()
def is_host_configured_for_vpool(self, vpool_guid, ip): if ( self._is_devstack is False and self._is_openstack is False ) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'vPool configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return False # 1. Check cinder driver vpool = VPool(vpool_guid) with remote(ip, [RawConfigParser], 'root') as rem: cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if not cfg.has_section(vpool.name): self._logger.info( 'Section "{0}" was not found in cinder configuration file'. format(vpool.name)) return False for key, value in { "volume_driver": "cinder.volume.drivers.openvstorage.OVSVolumeDriver", "volume_backend_name": vpool.name, "vpool_name": vpool.name }.iteritems(): if cfg.get(vpool.name, key) != value: self._logger.info( 'Configuration parameter "{0}" does not contain the expected value "{1}"' .format(key, value)) return False enabled_backends = [] if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name not in enabled_backends: self._logger.info( 'vPool {0} has not been added to the enabled backends property' .format(vpool.name)) return False # 2. Check volume type if self.cinder_client and not [ volume_type for volume_type in self.cinder_client.volume_types.list() if volume_type.name == vpool.name ]: self._logger.info( 'Cinder client does not contain a volume of type "{0}"'.format( vpool.name)) return False return True
def check_file_is_link(file_path, host, username=None, password=None): """ Check if a file on a node is a symlink :param host: Host node to check file system :param file_path: File to check eg. '/dev/disk/by-id/wwn-0x500003941b780823' :param username: Username used to login on host :param password: Password used to login on host :return: Boolean """ if username is None: username = '******' with remote(host, [os], username=username, password=password, strict_host_key_checking=False) as rem: return rem.os.path.islink(file_path)
def retrieve_storagerouter_info_via_host(ip, password): """ Retrieve the storagerouters from model """ storagerouters = {} try: from ovs.dal.lists.storagerouterlist import StorageRouterList with remote(ip_info=ip, modules=[StorageRouterList], username='******', password=password, strict_host_key_checking=False) as rem: for sr in rem.StorageRouterList.get_storagerouters(): storagerouters[sr.name] = {'ip': sr.ip, 'type': sr.node_type.lower()} except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages='Error loading storagerouters: {0}'.format(ex), loglevel='exception', silent=True) return storagerouters
def configure_vpool_for_host(self, vpool_guid, ip): if ( self._is_devstack is False and self._is_openstack is False ) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Configure vPool: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return vpool = VPool(vpool_guid) self._logger.debug('configure_vpool {0} started'.format(vpool.name)) # 1. Configure Cinder driver with remote(ip, [RawConfigParser, open], 'root') as rem: changed = False cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if not cfg.has_section(vpool.name): changed = True cfg.add_section(vpool.name) cfg.set(vpool.name, "volume_driver", "cinder.volume.drivers.openvstorage.OVSVolumeDriver") cfg.set(vpool.name, "volume_backend_name", vpool.name) cfg.set(vpool.name, "vpool_name", vpool.name) enabled_backends = [] if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name not in enabled_backends: changed = True enabled_backends.append(vpool.name) cfg.set("DEFAULT", "enabled_backends", ", ".join(enabled_backends)) if changed is True: with rem.open(self._CINDER_CONF, "w") as fp: cfg.write(fp) # 2. Create volume type if self.cinder_client and not [ volume_type for volume_type in self.cinder_client.volume_types.list() if volume_type.name == vpool.name ]: volume_type = self.cinder_client.volume_types.create(vpool.name) volume_type.set_keys(metadata={'volume_backend_name': vpool.name}) # 3. Restart processes self.client = SSHClient(ip, username='******') self._restart_processes() self._logger.debug('configure_vpool {0} completed'.format(vpool.name))
def save(self, client=None, reload_config=True): """ Saves the configuration to a given file, optionally a remote one :param client: If provided, save remote configuration :param reload_config: Reload the running Storage Driver configuration """ Configuration.set(self.key, json.dumps(self.configuration, indent=4), raw=True) if self.config_type == 'storagedriver' and reload_config is True: if len(self.dirty_entries) > 0: if client is None: self._logger.info( 'Applying local storagedriver configuration changes') changes = LocalStorageRouterClient( self.remote_path).update_configuration( self.remote_path) else: self._logger.info( 'Applying storagedriver configuration changes on {0}'. format(client.ip)) with remote(client.ip, [LocalStorageRouterClient]) as rem: changes = copy.deepcopy( rem.LocalStorageRouterClient( self.remote_path).update_configuration( self.remote_path)) for change in changes: if change['param_name'] not in self.dirty_entries: raise RuntimeError( 'Unexpected configuration change: {0}'.format( change['param_name'])) self._logger.info('Changed {0} from "{1}" to "{2}"'.format( change['param_name'], change['old_value'], change['new_value'])) self.dirty_entries.remove(change['param_name']) self._logger.info('Changes applied') if len(self.dirty_entries) > 0: self._logger.warning( 'Following changes were not applied: {0}'.format( ', '.join(self.dirty_entries))) else: self._logger.debug('No need to apply changes, nothing changed') self.is_new = False self.dirty_entries = []
def ovs_4509_validate_arakoon_collapse_test(): """ Validate arakoon collapse """ node_ips = [sr.ip for sr in GeneralStorageRouter.get_storage_routers()] node_ips.sort() for node_ip in node_ips: root_client = SSHClient(node_ip, username='******') arakoon_clusters = [] for service in ServiceList.get_services(): if service.is_internal is True and service.storagerouter.ip == node_ip and \ service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): arakoon_clusters.append(service.name.replace('arakoon-', '')) for arakoon_cluster in arakoon_clusters: arakoon_config_path = Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(arakoon_cluster)) tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(arakoon_cluster) # read_tlog_dir with remote(node_ip, [Configuration]) as rem: config_contents = rem.Configuration.get('/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True) for line in config_contents.splitlines(): if 'tlog_dir' in line: tlog_location = line.split()[-1] nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location) old_headdb_timestamp = 0 if root_client.file_exists('/'.join([tlog_location, 'head.db'])): old_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db']) if nr_of_tlogs <= 2: benchmark_command = ['arakoon', '--benchmark', '-n_clients', '1', '-max_n', '5_000', '-config', arakoon_config_path] root_client.run(benchmark_command) GenericController.collapse_arakoon() nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location) new_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db']) assert nr_of_tlogs <= 2,\ 'Arakoon collapse left {0} tlogs on the environment, expecting less than 2'.format(nr_of_tlogs) assert old_headdb_timestamp != new_headdb_timestamp,\ 'Timestamp of the head_db file was not changed in the process of collapsing tlogs'
def wait_for_cluster(cluster_name, sshclient): """ Waits for an Arakoon cluster to be available (by sending a nop) :param cluster_name: Name of the cluster to wait on :type cluster_name: str :param sshclient: Client on which to wait for the cluster :type sshclient: SSHClient :return: True :rtype: Boolean """ ArakoonInstaller._logger.debug('Waiting for cluster {0}'.format(cluster_name)) from ovs.extensions.storage.persistentfactory import PersistentFactory with remote(sshclient.ip, [PersistentFactory], 'ovs') as rem: client = rem.PersistentFactory.get_client() client.nop() ArakoonInstaller._logger.debug('Waiting for cluster {0}: available'.format(cluster_name)) return True
def wait_for_cluster(cluster_name, sshclient): """ Waits for an Arakoon cluster to be available (by sending a nop) :param cluster_name: Name of the cluster to wait on :type cluster_name: str :param sshclient: Client on which to wait for the cluster :type sshclient: SSHClient :return: True :rtype: Boolean """ ArakoonInstaller._logger.debug('Waiting for cluster {0}'.format(cluster_name)) from ovs.extensions.storage.persistentfactory import PersistentFactory with remote(sshclient.ip, [PersistentFactory], 'ovs') as rem: client = rem.PersistentFactory.get_client() client.nop() ArakoonInstaller._logger.debug('Waiting for cluster {0}: available'.format(cluster_name)) return True
def save(self, client=None): """ Saves the configuration to a given file, optionally a remote one :param client: If provided, save remote configuration """ Configuration.set(self.key, json.dumps(self.configuration, indent=4), raw=True) if self.config_type == 'storagedriver': if len(self.dirty_entries) > 0: reloaded = False changes = [] if client is None: self._logger.info('Applying local storagedriver configuration changes') try: changes = LocalStorageRouterClient(self.remote_path).update_configuration(self.remote_path) reloaded = True except Exception as ex: if 'ClusterNotReachableException' not in str(ex): raise else: self._logger.info('Applying storagedriver configuration changes on {0}'.format(client.ip)) try: with remote(client.ip, [LocalStorageRouterClient]) as rem: changes = copy.deepcopy(rem.LocalStorageRouterClient(self.remote_path).update_configuration(self.remote_path)) reloaded = True except Exception as ex: if 'ClusterNotReachableException' not in str(ex): raise if reloaded is True: for change in changes: if change['param_name'] not in self.dirty_entries: raise RuntimeError('Unexpected configuration change: {0}'.format(change['param_name'])) self._logger.info('Changed {0} from "{1}" to "{2}"'.format(change['param_name'], change['old_value'], change['new_value'])) self.dirty_entries.remove(change['param_name']) self._logger.info('Changes applied') if len(self.dirty_entries) > 0: self._logger.warning('Following changes were not applied: {0}'.format(', '.join(self.dirty_entries))) else: self._logger.warning('Changes were not applied since storagedriver is unavailable') else: self._logger.debug('No need to apply changes, nothing changed') self.is_new = False self.dirty_entries = []
def unconfigure_vpool_for_host(self, vpool_guid, remove_volume_type, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Unconfigure vPool: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return vpool = VPool(vpool_guid) with remote(ip, [RawConfigParser, open], 'root') as rem: changed = False cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if cfg.has_section(vpool.name): changed = True cfg.remove_section(vpool.name) if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name in enabled_backends: changed = True enabled_backends.remove(vpool.name) cfg.set("DEFAULT", "enabled_backends", ", ".join(enabled_backends)) if changed is True: with rem.open(self._CINDER_CONF, "w") as fp: cfg.write(fp) if remove_volume_type and self.cinder_client: for volume_type in self.cinder_client.volume_types.list(): if volume_type.name == vpool.name: try: self.cinder_client.volume_types.delete(volume_type.id) except Exception as ex: self._logger.info( 'Removing volume type from cinder failed with error: {0}' .format(ex)) pass self._restart_processes()
def configure_vpool_for_host(self, vpool_guid, ip): if (self._is_devstack is False and self._is_openstack is False) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Configure vPool: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return vpool = VPool(vpool_guid) self._logger.debug('configure_vpool {0} started'.format(vpool.name)) # 1. Configure Cinder driver with remote(ip, [RawConfigParser, open], 'root') as rem: changed = False cfg = rem.RawConfigParser() cfg.read([self._CINDER_CONF]) if not cfg.has_section(vpool.name): changed = True cfg.add_section(vpool.name) cfg.set(vpool.name, "volume_driver", "cinder.volume.drivers.openvstorage.OVSVolumeDriver") cfg.set(vpool.name, "volume_backend_name", vpool.name) cfg.set(vpool.name, "vpool_name", vpool.name) enabled_backends = [] if cfg.has_option("DEFAULT", "enabled_backends"): enabled_backends = cfg.get("DEFAULT", "enabled_backends").split(", ") if vpool.name not in enabled_backends: changed = True enabled_backends.append(vpool.name) cfg.set("DEFAULT", "enabled_backends", ", ".join(enabled_backends)) if changed is True: with rem.open(self._CINDER_CONF, "w") as fp: cfg.write(fp) # 2. Create volume type if self.cinder_client and not [volume_type for volume_type in self.cinder_client.volume_types.list() if volume_type.name == vpool.name]: volume_type = self.cinder_client.volume_types.create(vpool.name) volume_type.set_keys(metadata={'volume_backend_name': vpool.name}) # 3. Restart processes self.client = SSHClient(ip, username='******') self._restart_processes() self._logger.debug('configure_vpool {0} completed'.format(vpool.name))
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status :type storagedriver_id: str :return: None """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) else: # No management Center, cannot update status via api StorageDriverController._logger.info('Updating status of pmachine {0} using SSHClient'.format(pmachine.name)) host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') configuration_dir = EtcdConfiguration.get('/ovs/framework/paths|cfgdir') StorageDriverController._logger.info('SSHClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) with remote(client.ip, [LocalStorageRouterClient]) as rem: lsrc = rem.LocalStorageRouterClient('{0}/storagedriver/storagedriver/{1}.json'.format(configuration_dir, storagedriver.vpool.name)) lsrc.server_revision() StorageDriverController._logger.info('LocalStorageRouterClient connected successfully to {0} at {1}'.format(pmachine.name, client.ip)) except Exception as ex: StorageDriverController._logger.error('Connectivity check failed, assuming host {0} is halted. {1}'.format(pmachine.name, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load(storagedriver.vpool) storagedriver_client.mark_node_offline(str(storagedriver.storagedriver_id))
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join( random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [(admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role])] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[ entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format( filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists( stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max( sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient( storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink( {sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in { 'mountpoint_md': (DiskPartition.ROLES.DB, { 'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG }), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, { 'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE }), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, { '': None }), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, { 'sco_{0}': StorageDriverPartition.SUBROLE.SCO }) }.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[ key][:] if is_list is True else [ storagedriver._data[key] ] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove( entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote( storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems( ): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max( sd_partition. number, number) if migrated is False: sd_partition = StorageDriverPartition( ) sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format( entry, folder.format( storagedriver. vpool.name)) else: source = entry client = SSHClient( storagedriver. storagerouter, username='******') path = sd_partition.path.rsplit( '/', 1)[0] if path: client.dir_create(path) client.dir_chown( path, 'ovs', 'ovs') client.symlink({ sd_partition.path: source }) migrated_objects[ source] = sd_partition if is_list: storagedriver._data[ key].remove(entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[ key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data[ 'mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get( 'content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip( 'KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get( 'scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip( 'KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk( '/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join( [root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk( ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join( [sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run( 'cat {0}'.format( state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, { 'type': DataList.where_operator.AND, 'items': [] }) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format( rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = { 'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier'] } vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend'][ 'arakoon_config'] = vpool.metadata['backend'].pop( 'metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk('/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join([root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk(ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join([sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run('cat {0}'.format(state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, {'type': DataList.where_operator.AND, 'items': []}) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def configure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Get Driver code self._logger.info('*** Configuring host with IP {0} ***'.format(ip)) self._logger.info(' Copy driver code') remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format( self._stack_version) remote_version = '0.0.0' existing_version = '0.0.0' try: from cinder.volume.drivers import openvstorage if hasattr(openvstorage, 'OVSVolumeDriver'): existing_version = getattr(openvstorage.OVSVolumeDriver, 'VERSION', '0.0.0') except ImportError: pass for line in self.client.file_read(remote_driver).splitlines(): if 'VERSION = ' in line: remote_version = line.split('VERSION = ')[-1].strip().replace( "'", "").replace('"', "") break nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: local_driver = '{0}/volume/drivers/openvstorage.py'.format( cinder_base_path) else: local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location) if remote_version > existing_version: self._logger.debug( 'Updating existing driver using {0} from version {1} to version {2}' .format(remote_driver, existing_version, remote_version)) self.client.run('cp -f {0} {1}'.format(remote_driver, local_driver)) else: self._logger.debug('Using driver {0} version {1}'.format( local_driver, existing_version)) # 2. Configure users and groups self._logger.info(' Add users to group ovs') users = ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users for user in users: self.client.run('usermod -a -G ovs {0}'.format(user)) # 3. Apply patches self._logger.info(' Applying patches') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format( os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format( self._driver_location) else: cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) self._logger.info(' Patching file {0}'.format(nova_volume_file)) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: file_contents += ''' class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver): def __init__(self, connection): super(LibvirtFileVolumeDriver, self).__init__(connection, is_block_dev=False) def get_config(self, connection_info, disk_info): conf = super(LibvirtFileVolumeDriver, self).get_config(connection_info, disk_info) conf.source_type = 'file' conf.source_path = connection_info['data']['device_path'] return conf ''' self.client.file_write(nova_volume_file, file_contents) self._logger.info(' Patching file {0}'.format(nova_driver_file)) file_contents = self.client.file_read(nova_driver_file) if self._stack_version in ('liberty', 'mitaka'): check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' if new_line not in file_contents: for line in file_contents.splitlines(): if check_line in line: stripped_line = line.rstrip() whitespaces = len(stripped_line) - len( stripped_line.lstrip()) new_line = "{0}'{1}',\n".format(' ' * whitespaces, new_line) fc = file_contents[:file_contents. index(line)] + new_line + file_contents[ file_contents.index(line):] self.client.file_write(nova_driver_file, "".join(fc)) break if os.path.exists(cinder_brick_initiator_file): # fix brick/upload to glance self._logger.info( ' Patching file {0}'.format(cinder_brick_initiator_file)) if self._stack_version in ('liberty', 'mitaka', 'newton'): self.client.run( """sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) else: self.client.run( """sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Configure messaging driver self._logger.info(' - Configure messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): changed = False cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): if cfg.get("DEFAULT", "notification_driver") != driver: changed = True cfg.set("DEFAULT", "notification_driver", driver) else: changed = True cfg.set("DEFAULT", "notification_driver", driver) if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" not in notification_topics: notification_topics.append("notifications") changed = True cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) else: changed = True cfg.set("DEFAULT", "notification_topics", "notifications") if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if not cfg.has_option("DEFAULT", param): changed = True cfg.set("DEFAULT", param, value) if changed is True: with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Enable events consumer self._logger.info(' - Enabling events consumer service') service_name = 'openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def cluster_registry_checkup(): """ Verify whether changes have occurred in the cluster registry for each vPool :return: Information whether changes occurred :rtype: dict """ changed_vpools = {} for vpool in VPoolList.get_vpools(): changed_vpools[vpool.guid] = {'changes': False, 'success': True} try: StorageDriverController._logger.info('Validating cluster registry settings for Vpool {0}'.format(vpool.guid)) current_configs = vpool.clusterregistry_client.get_node_configs() changes = len(current_configs) == 0 node_configs = [] for sd in vpool.storagedrivers: sd.invalidate_dynamics(['cluster_node_config']) new_config = sd.cluster_node_config node_configs.append(ClusterNodeConfig(**new_config)) if changes is False: current_node_configs = [config for config in current_configs if config.vrouter_id == sd.storagedriver_id] if len(current_node_configs) == 1: current_node_config = current_node_configs[0] for key in new_config: if getattr(current_node_config, key) != new_config[key]: changes = True break changed_vpools[vpool.guid]['changes'] = changes if changes is True: StorageDriverController._logger.info('Cluster registry settings for Vpool {0} needs to be updated'.format(vpool.guid)) available_storagedrivers = [] for sd in vpool.storagedrivers: storagerouter = sd.storagerouter try: SSHClient(storagerouter, username='******') with remote(storagerouter.ip, [LocalStorageRouterClient]) as rem: sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, sd.storagedriver_id) if Configuration.exists(sd_key) is True: path = Configuration.get_configuration_path(sd_key) lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision() # 'Cheap' call to verify whether volumedriver is responsive available_storagedrivers.append(sd) except UnableToConnectException: StorageDriverController._logger.warning('StorageRouter {0} not available.'.format(storagerouter.name)) except Exception as ex: if 'ClusterNotReachableException' in str(ex): StorageDriverController._logger.warning('StorageDriver {0} on StorageRouter {1} not available.'.format( sd.guid, storagerouter.name )) else: StorageDriverController._logger.exception('Got exception when validating StorageDriver {0} on StorageRouter {1}.'.format( sd.guid, storagerouter.name )) StorageDriverController._logger.info('Updating cluster node configs for VPool {0}'.format(vpool.guid)) vpool.clusterregistry_client.set_node_configs(node_configs) for sd in available_storagedrivers: StorageDriverController._logger.info('Trigger config reload for StorageDriver {0}'.format(sd.guid)) vpool.storagedriver_client.update_cluster_node_configs(str(sd.storagedriver_id), req_timeout_secs=10) StorageDriverController._logger.info('Updating cluster node configs for Vpool {0} completed'.format(vpool.guid)) else: StorageDriverController._logger.info('Cluster registry settings for Vpool {0} is up to date'.format(vpool.guid)) except Exception as ex: StorageDriverController._logger.exception('Got exception when validating cluster registry settings for Vpool {0}.'.format(vpool.name)) changed_vpools[vpool.guid]['success'] = False changed_vpools[vpool.guid]['error'] = ex.message return changed_vpools
def is_host_configured(self, ip): if ( self._is_devstack is False and self._is_openstack is False ) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return False # 1. Check driver code if self._is_devstack is True: if not self.client.file_exists(filename=self._devstack_driver): self._logger.info(' File "{0}" does not exist'.format( self._devstack_driver)) return False else: if not self.client.file_exists( filename='{0}/cinder/volume/drivers/openvstorage.py'. format(self._driver_location)): self._logger.info( ' File "{0}/cinder/volume/drivers/openvstorage.py" does not exist' .format(self._driver_location)) return False # 2. Check configured users ovs_id = self.client.run('id -u ovs') if not ovs_id: self._logger.info('Failed to determine the OVS user group ID') return False users = ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users for user in users: if '{0}(ovs)'.format(ovs_id) not in self.client.run( 'id -a {0}'.format(user)): self._logger.info( 'User "{0}" is not part of the OVS user group') return False # 3. Check patches nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format( os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format( self._driver_location) else: if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format( cinder_base_path) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: self._logger.info('File "{0}" is not configured properly'.format( nova_volume_file)) return False if self._stack_version in ('liberty', 'mitaka'): check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' file_contents = self.client.file_read(nova_driver_file) if check_line not in file_contents: self._logger.info('File "{0}" is not configured properly'.format( nova_driver_file)) return False if os.path.exists(cinder_brick_initiator_file): file_contents = self.client.file_read(cinder_brick_initiator_file) if self._stack_version in ('liberty', 'mitaka', 'newton'): if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents: self._logger.info( 'File "{0}" is not configured properly'.format( cinder_brick_initiator_file)) return False else: if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents: self._logger.info( 'File "{0}" is not configured properly'.format( cinder_brick_initiator_file)) return False # 4. Check messaging driver configuration nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' host_configured = True with remote(ip, [RawConfigParser], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) host_configured &= cfg.get("DEFAULT", "notification_driver") == driver host_configured &= "notifications" in cfg.get( "DEFAULT", "notification_topics") if config_file == self._NOVA_CONF: host_configured &= cfg.get( "DEFAULT", "notify_on_any_change") == "True" host_configured &= cfg.get( "DEFAULT", "notify_on_state_change") == "vm_and_task_state" if host_configured is False: self._logger.info( 'Nova and/or Cinder configuration files are not configured properly' ) return host_configured # 5. Check events consumer service service_name = 'ovs-openstack-events-consumer' if not (ServiceManager.has_service(service_name, self.client) and ServiceManager.get_service_status(service_name, self.client) is True): self._logger.info( 'Service "{0}" is not configured properly'.format( service_name)) return False return True
def update_framework(): """ Update the framework :return: None """ filemutex = file_mutex('system_update', wait=2) upgrade_file = '/etc/ready_for_upgrade' upgrade_ongoing_check_file = '/etc/upgrade_ongoing' ssh_clients = [] try: filemutex.acquire() UpdateController._log_message('+++ Starting framework update +++') from ovs.dal.lists.storagerouterlist import StorageRouterList UpdateController._log_message( 'Generating SSH client connections for each storage router') upgrade_file = '/etc/ready_for_upgrade' upgrade_ongoing_check_file = '/etc/upgrade_ongoing' storage_routers = StorageRouterList.get_storagerouters() ssh_clients = [] master_ips = [] extra_ips = [] for sr in storage_routers: ssh_clients.append(SSHClient(sr.ip, username='******')) if sr.node_type == 'MASTER': master_ips.append(sr.ip) elif sr.node_type == 'EXTRA': extra_ips.append(sr.ip) this_client = [ client for client in ssh_clients if client.is_local is True ][0] # Create locks UpdateController._log_message('Creating lock files', client_ip=this_client.ip) for client in ssh_clients: client.run( 'touch {0}'.format(upgrade_file) ) # Prevents manual install or upgrade individual packages client.run('touch {0}'.format(upgrade_ongoing_check_file) ) # Prevents clicking x times on 'Update' btn # Check requirements packages_to_update = set() all_services_to_restart = [] for client in ssh_clients: for function in Toolbox.fetch_hooks('update', 'metadata'): UpdateController._log_message( 'Executing function {0}'.format(function.__name__), client_ip=client.ip) output = function(client) for key, value in output.iteritems(): if key != 'framework': continue for package_info in value: packages_to_update.update(package_info['packages']) all_services_to_restart += package_info['services'] services_to_restart = [] for service in all_services_to_restart: if service not in services_to_restart: services_to_restart.append( service ) # Filter out duplicates maintaining the order of services (eg: watcher-framework before memcached) UpdateController._log_message( 'Services which will be restarted --> {0}'.format( ', '.join(services_to_restart))) UpdateController._log_message( 'Packages which will be installed --> {0}'.format( ', '.join(packages_to_update))) # Stop services if UpdateController._change_services_state( services=services_to_restart, ssh_clients=ssh_clients, action='stop') is False: UpdateController._log_message( 'Stopping all services on every node failed, cannot continue', client_ip=this_client.ip, severity='warning') UpdateController._remove_lock_files( [upgrade_file, upgrade_ongoing_check_file], ssh_clients) # Start services again if a service could not be stopped UpdateController._log_message( 'Attempting to start the services again', client_ip=this_client.ip) UpdateController._change_services_state( services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._log_message( 'Failed to stop all required services, aborting update', client_ip=this_client.ip, severity='error') return # Update packages failed_clients = [] for client in ssh_clients: PackageManager.update(client=client) try: UpdateController._log_message('Installing latest packages', client.ip) for package in packages_to_update: UpdateController._log_message( 'Installing {0}'.format(package), client.ip) PackageManager.install(package_name=package, client=client, force=True) UpdateController._log_message( 'Installed {0}'.format(package), client.ip) client.file_delete(upgrade_file) except subprocess.CalledProcessError as cpe: UpdateController._log_message( 'Upgrade failed with error: {0}'.format(cpe.output), client.ip, 'error') failed_clients.append(client) break if failed_clients: UpdateController._remove_lock_files( [upgrade_file, upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message( 'Error occurred. Attempting to start all services again', client_ip=this_client.ip, severity='error') UpdateController._change_services_state( services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._log_message( 'Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information' .format( '\n - '.join([client.ip for client in failed_clients]), this_client.ip), this_client.ip, 'error') return # Migrate code for client in ssh_clients: try: UpdateController._log_message('Started code migration', client.ip) try: with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) except EOFError as eof: UpdateController._log_message( 'EOFError during code migration, retrying {0}'. format(eof), client.ip, 'warning') with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) UpdateController._log_message('Finished code migration', client.ip) except Exception as ex: UpdateController._remove_lock_files( [upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message( 'Code migration failed with error: {0}'.format(ex), client.ip, 'error') return # Start services UpdateController._log_message('Starting services', client_ip=this_client.ip) model_services = [] if 'arakoon-ovsdb' in services_to_restart: model_services.append('arakoon-ovsdb') services_to_restart.remove('arakoon-ovsdb') if 'memcached' in services_to_restart: model_services.append('memcached') services_to_restart.remove('memcached') UpdateController._change_services_state(services=model_services, ssh_clients=ssh_clients, action='start') # Migrate model UpdateController._log_message('Started model migration', client_ip=this_client.ip) try: from ovs.dal.helpers import Migration with remote(ssh_clients[0].ip, [Migration]) as rem: rem.Migration.migrate() UpdateController._log_message('Finished model migration', client_ip=this_client.ip) except Exception as ex: UpdateController._remove_lock_files( [upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message( 'An unexpected error occurred: {0}'.format(ex), client_ip=this_client.ip, severity='error') return # Post upgrade actions UpdateController._log_message('Executing post upgrade actions', client_ip=this_client.ip) for client in ssh_clients: with remote(client.ip, [Toolbox, SSHClient]) as rem: for function in rem.Toolbox.fetch_hooks( 'update', 'postupgrade'): UpdateController._log_message( 'Executing action {0}'.format(function.__name__), client_ip=client.ip) try: function(rem.SSHClient(client.ip, username='******')) UpdateController._log_message( 'Executing action {0} completed'.format( function.__name__), client_ip=client.ip) except Exception as ex: UpdateController._log_message( 'Post upgrade action failed with error: {0}'. format(ex), client.ip, 'error') # Start watcher and restart support-agent UpdateController._change_services_state( services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._change_services_state(services=['support-agent'], ssh_clients=ssh_clients, action='restart') UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message('+++ Finished updating +++') except RuntimeError as rte: UpdateController._log_message( 'Error during framework update: {0}'.format(rte), severity='error') UpdateController._remove_lock_files( [upgrade_file, upgrade_ongoing_check_file], ssh_clients) except NoLockAvailableException: UpdateController._log_message( 'Another framework update is currently in progress!') except Exception as ex: UpdateController._log_message( 'Error during framework update: {0}'.format(ex), severity='error') UpdateController._remove_lock_files( [upgrade_file, upgrade_ongoing_check_file], ssh_clients) finally: filemutex.release()
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists( '/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter( rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format( vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format( configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set( config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config[ 'albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format( config_tree.format('abm')) EtcdConfiguration.set( config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid } alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) ServiceManager.add_service( name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log'][ 'dtl_transport'] = config['failovercache'][ 'failovercache_transport'] config['distributed_transaction_log'][ 'dtl_path'] = config['failovercache'][ 'failovercache_path'] config['volume_manager'][ 'dtl_throttle_usecs'] = config['volume_manager'][ 'foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache'][ 'clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace( 'KiB', '')) params = { 'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str( int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30)) } vmware_mode = EtcdConfiguration.get( '/ovs/framework/hosts/{0}/storagedriver|vmware_mode' .format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format( vpool.name) ServiceManager.add_service( name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read( '/opt/OpenvStorage/config/templates/{0}.conf'. format(template)) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1] } for key, value in params.iteritems(): contents = contents.replace( '<{0}>'.format(key), value) client.file_write(current_file, contents)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod(scrub_directory, 0777) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)} ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety(VDisk(vdisk_guid)) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: res = locked_client.scrub(work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)], backend_config=Configuration.get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result(scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
def unconfigure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return # 1. Remove driver code self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip)) self._logger.info(' Removing driver code') if self._is_devstack is True: self.client.file_delete(self._devstack_driver) else: self.client.file_delete('{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location)) # 2. Removing users from group self._logger.info(' Removing users from group ovs') for user in ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users: self.client.run('deluser {0} ovs'.format(user)) # 3. Revert patches self._logger.info(' Reverting patches') nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path) nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(cinder_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location) nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location) cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(self._driver_location) self._logger.info(' Reverting patched file: {0}'.format(nova_volume_file)) new_contents = [] skip_class = False for line in self.client.file_read(nova_volume_file).splitlines(): if line.startswith('class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'): skip_class = True continue if line.startswith('class'): skip_class = False if skip_class is False: new_contents.append(line) self.client.file_write(nova_volume_file, "".join(new_contents)) self._logger.info(' Reverting patched file: {0}'.format(nova_driver_file)) new_contents = [] for line in self.client.file_read(nova_driver_file).splitlines(): stripped_line = line.strip() if stripped_line.startswith("'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"): continue new_contents.append(line) self.client.file_write(nova_driver_file, "".join(new_contents)) if os.path.exists(cinder_brick_initiator_file): self._logger.info(' Reverting patched file: {0}'.format(cinder_brick_initiator_file)) self.client.run("""sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""".format(cinder_brick_initiator_file)) # 4. Unconfigure messaging driver self._logger.info(' Unconfiguring messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in {self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver}.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): cfg.remove_option("DEFAULT", "notification_driver") if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get("DEFAULT", "notification_topics").split(",") if "notifications" in notification_topics: notification_topics.remove("notifications") cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) if config_file == self._NOVA_CONF: for param, value in {'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state'}.iteritems(): if cfg.has_option("DEFAULT", param): cfg.remove_option("DEFAULT", param) with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Disable events consumer self._logger.info(' Disabling events consumer') service_name = 'ovs-openstack-events-consumer' if ServiceManager.has_service(service_name, self.client): ServiceManager.stop_service(service_name, self.client) ServiceManager.disable_service(service_name, self.client) ServiceManager.remove_service(service_name, self.client)
def sync_with_reality(storagerouter_guid=None): """ Syncs the Disks from all StorageRouters with the reality. :param storagerouter_guid: Guid of the Storage Router to synchronize """ storagerouters = [] if storagerouter_guid is not None: storagerouters.append(StorageRouter(storagerouter_guid)) else: storagerouters = StorageRouterList.get_storagerouters() for storagerouter in storagerouters: try: client = SSHClient(storagerouter, username='******') except UnableToConnectException: DiskController._logger.info( 'Could not connect to StorageRouter {0}, skipping'.format( storagerouter.ip)) continue configuration = {} # Gather mount data mount_mapping = {} mount_data = client.run('mount') for mount in mount_data.splitlines(): mount = mount.strip() match = re.search('(/dev/(.+?)) on (/.*?) type.*', mount) if match is not None: dev_name = match.groups()[0] uuid = client.run( 'blkid -o value -s UUID {0}'.format(dev_name)) if uuid: mount_mapping[uuid] = match.groups()[2] else: mount_mapping[match.groups()[1]] = match.groups()[2] # Gather raid information try: md_information = client.run('mdadm --detail /dev/md*', suppress_logging=True) except CalledProcessError: md_information = '' raid_members = [] for member in re.findall('(?: +[0-9]+){4} +[^/]+/dev/([a-z0-9]+)', md_information): raid_members.append(member) # Gather disk information with remote(storagerouter.ip, [Context, os]) as rem: context = rem.Context() devices = [ device for device in context.list_devices(subsystem='block') if ('ID_TYPE' in device and device['ID_TYPE'] == 'disk') or ('DEVNAME' in device and ('loop' in device['DEVNAME'] or 'nvme' in device['DEVNAME'] or 'md' in device['DEVNAME'])) ] for device in devices: is_partition = device['DEVTYPE'] == 'partition' device_path = device['DEVNAME'] device_name = device_path.split('/')[-1] partition_id = None partition_name = None extended_partition_info = None if is_partition is True: partition_name = device[ 'ID_FS_UUID'] if 'ID_FS_UUID' in device else device_name if 'ID_PART_ENTRY_NUMBER' in device: extended_partition_info = True partition_id = device['ID_PART_ENTRY_NUMBER'] if device_name.startswith( 'nvme') or device_name.startswith('loop'): device_name = device_name[:0 - int(len(partition_id) ) - 1] elif device_name.startswith('md'): device_name = device_name[:device_name. index('p')] else: device_name = device_name[:0 - int(len(partition_id) )] else: DiskController._logger.debug( 'Partition {0} has no partition metadata'. format(device_path)) extended_partition_info = False match = re.match('^(\D+?)(\d+)$', device_name) if match is None: DiskController._logger.debug( 'Could not handle disk/partition {0}'. format(device_path)) continue # Unable to handle this disk/partition partition_id = match.groups()[1] device_name = match.groups()[0] sectors = int( client.run( 'cat /sys/block/{0}/size'.format(device_name))) sector_size = int( client.run( 'cat /sys/block/{0}/queue/hw_sector_size'.format( device_name))) rotational = int( client.run( 'cat /sys/block/{0}/queue/rotational'.format( device_name))) if sectors == 0: continue if device_name in raid_members: continue if device_name not in configuration: configuration[device_name] = {'partitions': {}} path = None for path_type in ['by-id', 'by-uuid']: if path is not None: break if 'DEVLINKS' in device: for item in device['DEVLINKS'].split(' '): if path_type in item: path = item if path is None: path = device_path if is_partition is True: if 'ID_PART_ENTRY_TYPE' in device and device[ 'ID_PART_ENTRY_TYPE'] == '0x5': continue # This is an extended partition, let's skip that one if extended_partition_info is True: offset = int( device['ID_PART_ENTRY_OFFSET']) * sector_size size = int( device['ID_PART_ENTRY_SIZE']) * sector_size else: match = re.match('^(\D+?)(\d+)$', device_path) if match is None: DiskController._logger.debug( 'Could not handle disk/partition {0}'. format(device_path)) continue # Unable to handle this disk/partition partitions_info = DiskTools.get_partitions_info( match.groups()[0]) if device_path in partitions_info: partition_info = partitions_info[device_path] offset = int(partition_info['start']) size = int(partition_info['size']) else: DiskController._logger.warning( 'Could not retrieve partition info for disk/partition {0}' .format(device_path)) continue configuration[device_name]['partitions'][ partition_id] = { 'offset': offset, 'size': size, 'path': path, 'state': 'OK' } partition_data = configuration[device_name][ 'partitions'][partition_id] if partition_name in mount_mapping: mountpoint = mount_mapping[partition_name] partition_data['mountpoint'] = mountpoint partition_data['inode'] = rem.os.stat( mountpoint).st_dev del mount_mapping[partition_name] try: client.run('touch {0}/{1}; rm {0}/{1}'.format( mountpoint, str(time.time()))) except CalledProcessError: partition_data['state'] = 'FAILURE' pass if 'ID_FS_TYPE' in device: partition_data['filesystem'] = device['ID_FS_TYPE'] else: configuration[device_name].update({ 'name': device_name, 'path': path, 'vendor': device['ID_VENDOR'] if 'ID_VENDOR' in device else None, 'model': device['ID_MODEL'] if 'ID_MODEL' in device else None, 'size': sector_size * sectors, 'is_ssd': rotational == 0, 'state': 'OK' }) for partition_name in mount_mapping: device_name = partition_name.split('/')[-1] match = re.search('^(\D+?)(\d+)$', device_name) if match is not None: device_name = match.groups()[0] partition_id = match.groups()[1] if device_name not in configuration: configuration[device_name] = { 'partitions': {}, 'state': 'MISSING' } configuration[device_name]['partitions'][ partition_id] = { 'mountpoint': mount_mapping[partition_name], 'state': 'MISSING' } # Sync the model disk_names = [] for disk in storagerouter.disks: if disk.name not in configuration: for partition in disk.partitions: partition.delete() disk.delete() else: disk_names.append(disk.name) DiskController._update_disk(disk, configuration[disk.name]) partitions = [] partition_info = configuration[disk.name]['partitions'] for partition in disk.partitions: if partition.id not in partition_info: partition.delete() else: partitions.append(partition.id) DiskController._update_partition( partition, partition_info[partition.id]) for partition_id in partition_info: if partition_id not in partitions: DiskController._create_partition( partition_id, partition_info[partition_id], disk) for disk_name in configuration: if disk_name not in disk_names and configuration[disk_name][ 'state'] not in ['MISSING']: disk = Disk() disk.storagerouter = storagerouter disk.name = disk_name DiskController._update_disk(disk, configuration[disk_name]) partition_info = configuration[disk_name]['partitions'] for partition_id in partition_info: if partition_info[partition_id]['state'] not in [ 'MISSING' ]: DiskController._create_partition( partition_id, partition_info[partition_id], disk)
def update_status(storagedriver_id): """ Sets Storage Driver offline in case hypervisor management Center reports the hypervisor pmachine related to this Storage Driver as unavailable. :param storagedriver_id: ID of the storagedriver to update its status :type storagedriver_id: str :return: None """ pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id) storagedriver = StorageDriverList.get_by_storagedriver_id( storagedriver_id) storagerouter = storagedriver.storagerouter if pmachine.mgmtcenter: # Update status pmachine.invalidate_dynamics(['host_status']) host_status = pmachine.host_status else: # No management Center, cannot update status via api StorageDriverController._logger.info( 'Updating status of pmachine {0} using SSHClient'.format( pmachine.name)) path = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool.guid, storagedriver.storagedriver_id).remote_path host_status = 'RUNNING' try: client = SSHClient(storagerouter, username='******') StorageDriverController._logger.info( 'SSHClient connected successfully to {0} at {1}'.format( pmachine.name, client.ip)) except UnableToConnectException as ex: StorageDriverController._logger.error( 'SSHClient connectivity check failed, assuming host {0} is halted. {1}' .format(pmachine.name, ex)) host_status = 'HALTED' else: try: with remote(client.ip, [LocalStorageRouterClient]) as rem: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision() StorageDriverController._logger.info( 'LocalStorageRouterClient connected successfully to {0} at {1}' .format(pmachine.name, client.ip)) except (EOFError, RuntimeError, ClusterNotReachableException) as ex: StorageDriverController._logger.error( 'LocalStorageRouterClient check failed, assuming volumedriver on host {0} {1} is halted. {2}' .format(pmachine.name, client.ip, ex)) host_status = 'HALTED' if host_status != 'RUNNING': # Host is stopped storagedriver_client = StorageDriverClient.load( storagedriver.vpool) storagedriver_client.mark_node_offline( str(storagedriver.storagedriver_id)) StorageDriverController._logger.warning( 'Storagedriver {0} marked offline'.format( storagedriver.storagedriver_id))
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists('/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter(rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format(configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set(config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config['albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format(config_tree.format('abm')) EtcdConfiguration.set(config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid} alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log']['dtl_transport'] = config['failovercache']['failovercache_transport'] config['distributed_transaction_log']['dtl_path'] = config['failovercache']['failovercache_path'] config['volume_manager']['dtl_throttle_usecs'] = config['volume_manager']['foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache']['clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace('KiB', '')) params = {'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str(int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30))} vmware_mode = EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format(vpool.name) ServiceManager.add_service(name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read('/opt/OpenvStorage/config/templates/{0}.conf'.format(template)) params = {'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1]} for key, value in params.iteritems(): contents = contents.replace('<{0}>'.format(key), value) client.file_write(current_file, contents)
def update_framework(): """ Update the framework :return: None """ filemutex = file_mutex('system_update', wait=2) upgrade_file = '/etc/ready_for_upgrade' upgrade_ongoing_check_file = '/etc/upgrade_ongoing' ssh_clients = [] try: filemutex.acquire() UpdateController._log_message('+++ Starting framework update +++') from ovs.dal.lists.storagerouterlist import StorageRouterList UpdateController._log_message('Generating SSH client connections for each storage router') upgrade_file = '/etc/ready_for_upgrade' upgrade_ongoing_check_file = '/etc/upgrade_ongoing' storage_routers = StorageRouterList.get_storagerouters() ssh_clients = [] master_ips = [] extra_ips = [] for sr in storage_routers: ssh_clients.append(SSHClient(sr.ip, username='******')) if sr.node_type == 'MASTER': master_ips.append(sr.ip) elif sr.node_type == 'EXTRA': extra_ips.append(sr.ip) this_client = [client for client in ssh_clients if client.is_local is True][0] # Create locks UpdateController._log_message('Creating lock files', client_ip=this_client.ip) for client in ssh_clients: client.run('touch {0}'.format(upgrade_file)) # Prevents manual install or upgrade individual packages client.run('touch {0}'.format(upgrade_ongoing_check_file)) # Prevents clicking x times on 'Update' btn # Check requirements packages_to_update = set() all_services_to_restart = [] for client in ssh_clients: for function in Toolbox.fetch_hooks('update', 'metadata'): UpdateController._log_message('Executing function {0}'.format(function.__name__), client_ip=client.ip) output = function(client) for key, value in output.iteritems(): if key != 'framework': continue for package_info in value: packages_to_update.update(package_info['packages']) all_services_to_restart += package_info['services'] services_to_restart = [] for service in all_services_to_restart: if service not in services_to_restart: services_to_restart.append(service) # Filter out duplicates maintaining the order of services (eg: watcher-framework before memcached) UpdateController._log_message('Services which will be restarted --> {0}'.format(', '.join(services_to_restart))) UpdateController._log_message('Packages which will be installed --> {0}'.format(', '.join(packages_to_update))) # Stop services if UpdateController._change_services_state(services=services_to_restart, ssh_clients=ssh_clients, action='stop') is False: UpdateController._log_message('Stopping all services on every node failed, cannot continue', client_ip=this_client.ip, severity='warning') UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients) # Start services again if a service could not be stopped UpdateController._log_message('Attempting to start the services again', client_ip=this_client.ip) UpdateController._change_services_state(services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._log_message('Failed to stop all required services, aborting update', client_ip=this_client.ip, severity='error') return # Update packages failed_clients = [] for client in ssh_clients: PackageManager.update(client=client) try: UpdateController._log_message('Installing latest packages', client.ip) for package in packages_to_update: UpdateController._log_message('Installing {0}'.format(package), client.ip) PackageManager.install(package_name=package, client=client, force=True) UpdateController._log_message('Installed {0}'.format(package), client.ip) client.file_delete(upgrade_file) except subprocess.CalledProcessError as cpe: UpdateController._log_message('Upgrade failed with error: {0}'.format(cpe.output), client.ip, 'error') failed_clients.append(client) break if failed_clients: UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message('Error occurred. Attempting to start all services again', client_ip=this_client.ip, severity='error') UpdateController._change_services_state(services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._log_message('Failed to upgrade following nodes:\n - {0}\nPlease check /var/log/ovs/lib.log on {1} for more information'.format('\n - '.join([client.ip for client in failed_clients]), this_client.ip), this_client.ip, 'error') return # Migrate code for client in ssh_clients: try: UpdateController._log_message('Started code migration', client.ip) try: with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) except EOFError as eof: UpdateController._log_message('EOFError during code migration, retrying {0}'.format(eof), client.ip, 'warning') with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) UpdateController._log_message('Finished code migration', client.ip) except Exception as ex: UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message('Code migration failed with error: {0}'.format(ex), client.ip, 'error') return # Start services UpdateController._log_message('Starting services', client_ip=this_client.ip) model_services = [] if 'arakoon-ovsdb' in services_to_restart: model_services.append('arakoon-ovsdb') services_to_restart.remove('arakoon-ovsdb') if 'memcached' in services_to_restart: model_services.append('memcached') services_to_restart.remove('memcached') UpdateController._change_services_state(services=model_services, ssh_clients=ssh_clients, action='start') # Migrate model UpdateController._log_message('Started model migration', client_ip=this_client.ip) try: from ovs.dal.helpers import Migration with remote(ssh_clients[0].ip, [Migration]) as rem: rem.Migration.migrate() UpdateController._log_message('Finished model migration', client_ip=this_client.ip) except Exception as ex: UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message('An unexpected error occurred: {0}'.format(ex), client_ip=this_client.ip, severity='error') return # Post upgrade actions UpdateController._log_message('Executing post upgrade actions', client_ip=this_client.ip) for client in ssh_clients: with remote(client.ip, [Toolbox, SSHClient]) as rem: for function in rem.Toolbox.fetch_hooks('update', 'postupgrade'): UpdateController._log_message('Executing action {0}'.format(function.__name__), client_ip=client.ip) try: function(rem.SSHClient(client.ip, username='******')) UpdateController._log_message('Executing action {0} completed'.format(function.__name__), client_ip=client.ip) except Exception as ex: UpdateController._log_message('Post upgrade action failed with error: {0}'.format(ex), client.ip, 'error') # Start watcher and restart support-agent UpdateController._change_services_state(services=services_to_restart, ssh_clients=ssh_clients, action='start') UpdateController._change_services_state(services=['support-agent'], ssh_clients=ssh_clients, action='restart') UpdateController._remove_lock_files([upgrade_ongoing_check_file], ssh_clients) UpdateController._log_message('+++ Finished updating +++') except RuntimeError as rte: UpdateController._log_message('Error during framework update: {0}'.format(rte), severity='error') UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients) except NoLockAvailableException: UpdateController._log_message('Another framework update is currently in progress!') except Exception as ex: UpdateController._log_message('Error during framework update: {0}'.format(ex), severity='error') UpdateController._remove_lock_files([upgrade_file, upgrade_ongoing_check_file], ssh_clients) finally: filemutex.release()
def configure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return # 1. Get Driver code self._logger.info('*** Configuring host with IP {0} ***'.format(ip)) self._logger.info(' Copy driver code') remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format(self._stack_version) remote_version = '0.0.0' existing_version = '0.0.0' try: from cinder.volume.drivers import openvstorage if hasattr(openvstorage, 'OVSVolumeDriver'): existing_version = getattr(openvstorage.OVSVolumeDriver, 'VERSION', '0.0.0') except ImportError: pass for line in self.client.file_read(remote_driver).splitlines(): if 'VERSION = ' in line: remote_version = line.split('VERSION = ')[-1].strip().replace("'", "").replace('"', "") break nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: local_driver = '{0}/volume/drivers/openvstorage.py'.format(cinder_base_path) else: local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location) if remote_version > existing_version: self._logger.debug('Updating existing driver using {0} from version {1} to version {2}'.format(remote_driver, existing_version, remote_version)) self.client.run('cp -f {0} {1}'.format(remote_driver, local_driver)) else: self._logger.debug('Using driver {0} version {1}'.format(local_driver, existing_version)) # 2. Configure users and groups self._logger.info(' Add users to group ovs') users = ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users for user in users: self.client.run('usermod -a -G ovs {0}'.format(user)) # 3. Apply patches self._logger.info(' Applying patches') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format(os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(self._driver_location) else: cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(self._driver_location) if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location) self._logger.info(' Patching file {0}'.format(nova_volume_file)) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: file_contents += ''' class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver): def __init__(self, connection): super(LibvirtFileVolumeDriver, self).__init__(connection, is_block_dev=False) def get_config(self, connection_info, disk_info): conf = super(LibvirtFileVolumeDriver, self).get_config(connection_info, disk_info) conf.source_type = 'file' conf.source_path = connection_info['data']['device_path'] return conf ''' self.client.file_write(nova_volume_file, file_contents) self._logger.info(' Patching file {0}'.format(nova_driver_file)) file_contents = self.client.file_read(nova_driver_file) if self._stack_version in ('liberty', 'mitaka'): check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver' new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' if new_line not in file_contents: for line in file_contents.splitlines(): if check_line in line: stripped_line = line.rstrip() whitespaces = len(stripped_line) - len(stripped_line.lstrip()) new_line = "{0}'{1}',\n".format(' ' * whitespaces, new_line) fc = file_contents[:file_contents.index(line)] + new_line + file_contents[file_contents.index(line):] self.client.file_write(nova_driver_file, "".join(fc)) break if os.path.exists(cinder_brick_initiator_file): # fix brick/upload to glance self._logger.info(' Patching file {0}'.format(cinder_brick_initiator_file)) if self._stack_version in ('liberty', 'mitaka', 'newton'): self.client.run("""sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}""".format(cinder_brick_initiator_file)) else: self.client.run("""sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}""".format(cinder_brick_initiator_file)) # 4. Configure messaging driver self._logger.info(' - Configure messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in {self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver}.iteritems(): changed = False cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): if cfg.get("DEFAULT", "notification_driver") != driver: changed = True cfg.set("DEFAULT", "notification_driver", driver) else: changed = True cfg.set("DEFAULT", "notification_driver", driver) if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get("DEFAULT", "notification_topics").split(",") if "notifications" not in notification_topics: notification_topics.append("notifications") changed = True cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) else: changed = True cfg.set("DEFAULT", "notification_topics", "notifications") if config_file == self._NOVA_CONF: for param, value in {'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state'}.iteritems(): if not cfg.has_option("DEFAULT", param): changed = True cfg.set("DEFAULT", param, value) if changed is True: with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Enable events consumer self._logger.info(' - Enabling events consumer service') service_name = 'openstack-events-consumer' if not ServiceManager.has_service(service_name, self.client): ServiceManager.add_service(service_name, self.client) ServiceManager.enable_service(service_name, self.client) ServiceManager.start_service(service_name, self.client)
def unconfigure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Remove driver code self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip)) self._logger.info(' Removing driver code') if self._is_devstack is True: self.client.file_delete(self._devstack_driver) else: self.client.file_delete( '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location)) # 2. Removing users from group self._logger.info(' Removing users from group ovs') for user in ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users: self.client.run('deluser {0} ovs'.format(user)) # 3. Revert patches self._logger.info(' Reverting patches') nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format( cinder_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) self._logger.info( ' Reverting patched file: {0}'.format(nova_volume_file)) new_contents = [] skip_class = False for line in self.client.file_read(nova_volume_file).splitlines(): if line.startswith( 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'): skip_class = True continue if line.startswith('class'): skip_class = False if skip_class is False: new_contents.append(line) self.client.file_write(nova_volume_file, "".join(new_contents)) self._logger.info( ' Reverting patched file: {0}'.format(nova_driver_file)) new_contents = [] for line in self.client.file_read(nova_driver_file).splitlines(): stripped_line = line.strip() if stripped_line.startswith( "'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"): continue new_contents.append(line) self.client.file_write(nova_driver_file, "".join(new_contents)) if os.path.exists(cinder_brick_initiator_file): self._logger.info(' Reverting patched file: {0}'.format( cinder_brick_initiator_file)) self.client.run( """sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Unconfigure messaging driver self._logger.info(' Unconfiguring messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): cfg.remove_option("DEFAULT", "notification_driver") if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" in notification_topics: notification_topics.remove("notifications") cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if cfg.has_option("DEFAULT", param): cfg.remove_option("DEFAULT", param) with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Disable events consumer self._logger.info(' Disabling events consumer') service_name = 'ovs-openstack-events-consumer' if ServiceManager.has_service(service_name, self.client): ServiceManager.stop_service(service_name, self.client) ServiceManager.disable_service(service_name, self.client) ServiceManager.remove_service(service_name, self.client)
def is_host_configured(self, ip): if (self._is_devstack is False and self._is_openstack is False) or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return False # 1. Check driver code if self._is_devstack is True: if not self.client.file_exists(filename = self._devstack_driver): self._logger.info(' File "{0}" does not exist'.format(self._devstack_driver)) return False else: if not self.client.file_exists(filename = '{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location)): self._logger.info(' File "{0}/cinder/volume/drivers/openvstorage.py" does not exist'.format(self._driver_location)) return False # 2. Check configured users ovs_id = self.client.run('id -u ovs') if not ovs_id: self._logger.info('Failed to determine the OVS user group ID') return False users = ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users for user in users: if '{0}(ovs)'.format(ovs_id) not in self.client.run('id -a {0}'.format(user)): self._logger.info('User "{0}" is not part of the OVS user group') return False # 3. Check patches nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._stack_version in ('liberty', 'mitaka', 'newton'): try: import os_brick cinder_brick_initiator_file = "{0}/initiator/connector.py".format(os.path.dirname(os_brick.__file__)) except ImportError: cinder_brick_initiator_file = '' if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(self._driver_location) else: if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(cinder_base_path) if self._is_devstack is True: nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path) else: nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location) file_contents = self.client.file_read(nova_volume_file) if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents: self._logger.info('File "{0}" is not configured properly'.format(nova_volume_file)) return False if self._stack_version in ('liberty', 'mitaka'): check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver' else: check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver' file_contents = self.client.file_read(nova_driver_file) if check_line not in file_contents: self._logger.info('File "{0}" is not configured properly'.format(nova_driver_file)) return False if os.path.exists(cinder_brick_initiator_file): file_contents = self.client.file_read(cinder_brick_initiator_file) if self._stack_version in ('liberty', 'mitaka', 'newton'): if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents: self._logger.info('File "{0}" is not configured properly'.format(cinder_brick_initiator_file)) return False else: if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents: self._logger.info('File "{0}" is not configured properly'.format(cinder_brick_initiator_file)) return False # 4. Check messaging driver configuration nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' host_configured = True with remote(ip, [RawConfigParser], 'root') as rem: for config_file, driver in {self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver}.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) host_configured &= cfg.get("DEFAULT", "notification_driver") == driver host_configured &= "notifications" in cfg.get("DEFAULT", "notification_topics") if config_file == self._NOVA_CONF: host_configured &= cfg.get("DEFAULT", "notify_on_any_change") == "True" host_configured &= cfg.get("DEFAULT", "notify_on_state_change") == "vm_and_task_state" if host_configured is False: self._logger.info('Nova and/or Cinder configuration files are not configured properly') return host_configured # 5. Check events consumer service service_name = 'ovs-openstack-events-consumer' if not (ServiceManager.has_service(service_name, self.client) and ServiceManager.get_service_status(service_name, self.client) is True): self._logger.info('Service "{0}" is not configured properly'.format(service_name)) return False return True
def execute_update(components): """ Update the specified components on all StorageRouters This is called upon by 'at' :return: None """ filemutex = file_mutex('system_update', wait=2) ssh_clients = [] services_stop_start = set() try: filemutex.acquire() UpdateController._logger.debug('+++ Starting update +++') from ovs.dal.lists.storagerouterlist import StorageRouterList # Create SSHClients to all nodes UpdateController._logger.debug('Generating SSH client connections for each storage router') storage_routers = StorageRouterList.get_storagerouters() master_ips = [] extra_ips = [] for sr in storage_routers: try: ssh_clients.append(SSHClient(sr.ip, username='******')) if sr.node_type == 'MASTER': master_ips.append(sr.ip) elif sr.node_type == 'EXTRA': extra_ips.append(sr.ip) except UnableToConnectException: raise Exception('Update is only allowed on systems where all nodes are online and fully functional') # Create locks for client in ssh_clients: UpdateController._logger.debug('{0}: Creating lock files'.format(client.ip)) client.run(['touch', UpdateController._update_file]) # Prevents manual install or update individual packages client.run(['touch', UpdateController._update_ongoing_file]) # Check requirements packages_to_update = {} services_post_update = set() update_information = UpdateController.get_update_information_all() for component, component_info in update_information.iteritems(): if component in components: UpdateController._logger.debug('Verifying update information for component: {0}'.format(component.upper())) Toolbox.verify_required_params(actual_params=component_info, required_params={'downtime': (list, None), 'packages': (dict, None), 'prerequisites': (list, None), 'services_stop_start': (set, None), 'services_post_update': (set, None)}) if len(component_info['prerequisites']) > 0: raise Exception('Update is only allowed when all prerequisites have been met') packages_to_update.update(component_info['packages']) services_stop_start.update(component_info['services_stop_start']) services_post_update.update(component_info['services_post_update']) if len(packages_to_update) > 0: UpdateController._logger.debug('Packages to be updated: {0}'.format(', '.join(sorted(packages_to_update.keys())))) if len(services_stop_start) > 0: UpdateController._logger.debug('Services to stop before package update: {0}'.format(', '.join(sorted(services_stop_start)))) if len(services_post_update) > 0: UpdateController._logger.debug('Services which will be restarted after update: {0}'.format(', '.join(sorted(services_post_update)))) # Stop services if UpdateController.change_services_state(services=services_stop_start, ssh_clients=ssh_clients, action='stop') is False: raise Exception('Stopping all services on every node failed, cannot continue') # Install packages # First install packages on all StorageRouters individually if packages_to_update: failures = False for client in ssh_clients: UpdateController._logger.debug('{0}: Installing packages'.format(client.ip)) for function in Toolbox.fetch_hooks('update', 'package_install_multi'): try: function(client=client, package_info=packages_to_update, components=components) except Exception as ex: UpdateController._logger.error('{0}: Package installation hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex)) failures = True if set(components).difference({'framework', 'storagedriver'}): # Second install packages on all ALBA nodes for function in Toolbox.fetch_hooks('update', 'package_install_single'): try: function(package_info=packages_to_update, components=components) except Exception as ex: UpdateController._logger.exception('Package installation hook {0} failed with error: {1}'.format(function.__name__, ex)) failures = True if failures is True: raise Exception('Installing the packages failed on 1 or more nodes') # Remove update file for client in ssh_clients: client.file_delete(UpdateController._update_file) # Migrate code if 'framework' in components: failures = [] for client in ssh_clients: UpdateController._logger.debug('{0}: Verifying extensions code migration is required'.format(client.ip)) try: key = '/ovs/framework/hosts/{0}/versions'.format(System.get_my_machine_id(client=client)) old_versions = Configuration.get(key) if Configuration.exists(key) else {} try: with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) except EOFError as eof: UpdateController._logger.warning('{0}: EOFError during code migration, retrying {1}'.format(client.ip, eof)) with remote(client.ip, [Migrator]) as rem: rem.Migrator.migrate(master_ips, extra_ips) new_versions = Configuration.get(key) if Configuration.exists(key) else {} if old_versions != new_versions: UpdateController._logger.debug('{0}: Finished extensions code migration. Old versions: {1} --> New versions: {2}'.format(client.ip, old_versions, new_versions)) except Exception as ex: failures.append('{0}: {1}'.format(client.ip, str(ex))) if len(failures) > 0: raise Exception('Failed to run the extensions migrate code on all nodes. Errors found:\n\n{0}'.format('\n\n'.join(failures))) # Start memcached if 'memcached' in services_stop_start: services_stop_start.remove('memcached') UpdateController._logger.debug('Starting memcached') UpdateController.change_services_state(services=['memcached'], ssh_clients=ssh_clients, action='start') # Migrate model if 'framework' in components: UpdateController._logger.debug('Verifying DAL code migration is required') old_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {} from ovs.dal.helpers import Migration with remote(ssh_clients[0].ip, [Migration]) as rem: rem.Migration.migrate() new_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {} if old_versions != new_versions: UpdateController._logger.debug('Finished DAL code migration. Old versions: {0} --> New versions: {1}'.format(old_versions, new_versions)) # Post update actions for client in ssh_clients: UpdateController._logger.debug('{0}: Executing post-update actions'.format(client.ip)) for function in Toolbox.fetch_hooks('update', 'post_update_multi'): try: function(client=client, components=components) except Exception as ex: UpdateController._logger.exception('{0}: Post update hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex)) for function in Toolbox.fetch_hooks('update', 'post_update_single'): try: function(components=components) except Exception as ex: UpdateController._logger.exception('Post update hook {0} failed with error: {1}'.format(function.__name__, ex)) # Start services UpdateController.change_services_state(services=services_stop_start, ssh_clients=ssh_clients, action='start') UpdateController._refresh_package_information() UpdateController._logger.debug('+++ Finished updating +++') except NoLockAvailableException: UpdateController._logger.debug('Another update is currently in progress!') except Exception as ex: UpdateController._logger.exception('Error during update: {0}'.format(ex)) if len(ssh_clients) > 0: UpdateController.change_services_state(services=services_stop_start, ssh_clients=ssh_clients, action='start') UpdateController._refresh_package_information() UpdateController._logger.error('Failed to update. Please check all the logs for more information') finally: filemutex.release() for ssh_client in ssh_clients: for file_name in [UpdateController._update_file, UpdateController._update_ongoing_file]: try: if ssh_client.file_exists(file_name): ssh_client.file_delete(file_name) except: UpdateController._logger.warning('[0}: Failed to remove lock file {1}'.format(ssh_client.ip, file_name))
def sync_with_reality(storagerouter_guid=None): """ Syncs the Disks from all StorageRouters with the reality. :param storagerouter_guid: Guid of the Storage Router to synchronize """ storagerouters = [] if storagerouter_guid is not None: storagerouters.append(StorageRouter(storagerouter_guid)) else: storagerouters = StorageRouterList.get_storagerouters() for storagerouter in storagerouters: try: client = SSHClient(storagerouter, username='******') except UnableToConnectException: DiskController._logger.info('Could not connect to StorageRouter {0}, skipping'.format(storagerouter.ip)) continue configuration = {} # Gather mount data mount_mapping = {} mount_data = client.run('mount') for mount in mount_data.splitlines(): mount = mount.strip() match = re.search('(/dev/(.+?)) on (/.*?) type.*', mount) if match is not None: dev_name = match.groups()[0] uuid = client.run('blkid -o value -s UUID {0}'.format(dev_name)) if uuid: mount_mapping[uuid] = match.groups()[2] else: mount_mapping[match.groups()[1]] = match.groups()[2] # Gather raid information try: md_information = client.run('mdadm --detail /dev/md*', suppress_logging=True) except CalledProcessError: md_information = '' raid_members = [] for member in re.findall('(?: +[0-9]+){4} +[^/]+/dev/([a-z0-9]+)', md_information): raid_members.append(member) # Gather disk information with remote(storagerouter.ip, [Context, os]) as rem: context = rem.Context() devices = [device for device in context.list_devices(subsystem='block') if ('ID_TYPE' in device and device['ID_TYPE'] == 'disk') or ('DEVNAME' in device and ('loop' in device['DEVNAME'] or 'nvme' in device['DEVNAME'] or 'md' in device['DEVNAME']))] for device in devices: is_partition = device['DEVTYPE'] == 'partition' device_path = device['DEVNAME'] device_name = device_path.split('/')[-1] partition_id = None partition_name = None extended_partition_info = None if is_partition is True: partition_name = device['ID_FS_UUID'] if 'ID_FS_UUID' in device else device_name if 'ID_PART_ENTRY_NUMBER' in device: extended_partition_info = True partition_id = device['ID_PART_ENTRY_NUMBER'] if device_name.startswith('nvme') or device_name.startswith('loop'): device_name = device_name[:0 - int(len(partition_id)) - 1] elif device_name.startswith('md'): device_name = device_name[:device_name.index('p')] else: device_name = device_name[:0 - int(len(partition_id))] else: DiskController._logger.debug('Partition {0} has no partition metadata'.format(device_path)) extended_partition_info = False match = re.match('^(\D+?)(\d+)$', device_name) if match is None: DiskController._logger.debug('Could not handle disk/partition {0}'.format(device_path)) continue # Unable to handle this disk/partition partition_id = match.groups()[1] device_name = match.groups()[0] sectors = int(client.run('cat /sys/block/{0}/size'.format(device_name))) sector_size = int(client.run('cat /sys/block/{0}/queue/hw_sector_size'.format(device_name))) rotational = int(client.run('cat /sys/block/{0}/queue/rotational'.format(device_name))) if sectors == 0: continue if device_name in raid_members: continue if device_name not in configuration: configuration[device_name] = {'partitions': {}} path = None for path_type in ['by-id', 'by-uuid']: if path is not None: break if 'DEVLINKS' in device: for item in device['DEVLINKS'].split(' '): if path_type in item: path = item if path is None: path = device_path if is_partition is True: if 'ID_PART_ENTRY_TYPE' in device and device['ID_PART_ENTRY_TYPE'] == '0x5': continue # This is an extended partition, let's skip that one if extended_partition_info is True: offset = int(device['ID_PART_ENTRY_OFFSET']) * sector_size size = int(device['ID_PART_ENTRY_SIZE']) * sector_size else: match = re.match('^(\D+?)(\d+)$', device_path) if match is None: DiskController._logger.debug('Could not handle disk/partition {0}'.format(device_path)) continue # Unable to handle this disk/partition partitions_info = DiskTools.get_partitions_info(match.groups()[0]) if device_path in partitions_info: partition_info = partitions_info[device_path] offset = int(partition_info['start']) size = int(partition_info['size']) else: DiskController._logger.warning('Could not retrieve partition info for disk/partition {0}'.format(device_path)) continue configuration[device_name]['partitions'][partition_id] = {'offset': offset, 'size': size, 'path': path, 'state': 'OK'} partition_data = configuration[device_name]['partitions'][partition_id] if partition_name in mount_mapping: mountpoint = mount_mapping[partition_name] partition_data['mountpoint'] = mountpoint partition_data['inode'] = rem.os.stat(mountpoint).st_dev del mount_mapping[partition_name] try: client.run('touch {0}/{1}; rm {0}/{1}'.format(mountpoint, str(time.time()))) except CalledProcessError: partition_data['state'] = 'FAILURE' pass if 'ID_FS_TYPE' in device: partition_data['filesystem'] = device['ID_FS_TYPE'] else: configuration[device_name].update({'name': device_name, 'path': path, 'vendor': device['ID_VENDOR'] if 'ID_VENDOR' in device else None, 'model': device['ID_MODEL'] if 'ID_MODEL' in device else None, 'size': sector_size * sectors, 'is_ssd': rotational == 0, 'state': 'OK'}) for partition_name in mount_mapping: device_name = partition_name.split('/')[-1] match = re.search('^(\D+?)(\d+)$', device_name) if match is not None: device_name = match.groups()[0] partition_id = match.groups()[1] if device_name not in configuration: configuration[device_name] = {'partitions': {}, 'state': 'MISSING'} configuration[device_name]['partitions'][partition_id] = {'mountpoint': mount_mapping[partition_name], 'state': 'MISSING'} # Sync the model disk_names = [] for disk in storagerouter.disks: if disk.name not in configuration: for partition in disk.partitions: partition.delete() disk.delete() else: disk_names.append(disk.name) DiskController._update_disk(disk, configuration[disk.name]) partitions = [] partition_info = configuration[disk.name]['partitions'] for partition in disk.partitions: if partition.id not in partition_info: partition.delete() else: partitions.append(partition.id) DiskController._update_partition(partition, partition_info[partition.id]) for partition_id in partition_info: if partition_id not in partitions: DiskController._create_partition(partition_id, partition_info[partition_id], disk) for disk_name in configuration: if disk_name not in disk_names and configuration[disk_name]['state'] not in ['MISSING']: disk = Disk() disk.storagerouter = storagerouter disk.name = disk_name DiskController._update_disk(disk, configuration[disk_name]) partition_info = configuration[disk_name]['partitions'] for partition_id in partition_info: if partition_info[partition_id]['state'] not in ['MISSING']: DiskController._create_partition(partition_id, partition_info[partition_id], disk)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format( scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format( vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format( vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod( scrub_directory, 0777 ) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service( name=alba_proxy_service, client=client ) is True and ServiceManager.get_service_status( name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|storagedriver'.format( machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get( 'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key) } ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get( 'ovs/vpools/{0}/hosts/{1}/config'.format( vpool.guid, vpool.storagedrivers[0].storagedriver_id ))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set( backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service( name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[ LogHandler.get_sink_path( 'scrubber', allow_override=True) ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)