def get_stats_mds(cls): """ Retrieve how many vDisks each MDS service is serving, whether as master or slave """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] environment = cls._config['environment'] service_type = ServiceTypeList.get_by_name('MetadataServer') if service_type is None: raise RuntimeError('MetadataServer service not found in the model') for service in service_type.services: slaves = 0 masters = 0 mds_service = service.mds_service for junction in mds_service.vdisks: if junction.is_master is True: masters += 1 else: slaves += 1 stats.append({'tags': {'vpool_name': mds_service.vpool.name, 'mds_number': mds_service.number, 'environment': environment, 'storagerouter_name': service.storagerouter.name}, 'fields': {'load': MDSServiceController.get_mds_load(mds_service)[0], 'capacity': mds_service.capacity if mds_service.capacity != -1 else 'infinite', 'masters': masters, 'slaves': slaves}, 'measurement': 'mds'}) return False, stats
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] servicetype = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: if len(remaining_ips) == 0: raise RuntimeError('Could not find any remaining arakoon nodes for the voldrv cluster') StorageDriverController._logger.debug('* Shrink StorageDriver cluster') cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=remaining_ips, cluster_name=cluster_name, offline_nodes=offline_node_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node :param offline_node_ips: IPs of nodes which are offline """ if offline_node_ips is None: offline_node_ips = [] client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv', offline_node_ips) if client is not None and ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(offline_node_ips)
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node :param offline_node_ips: IPs of nodes which are offline """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient( cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(cluster_ip, 'voldrv', offline_node_ips) if client is not None and ServiceManager.has_service( current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver()
def on_demote(cluster_ip, master_ip): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node """ client = SSHClient(cluster_ip, username='******') servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service else: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv') if ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() for storagerouter in StorageRouterList.get_storagerouters(): ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv') StorageDriverController._configure_arakoon_to_volumedriver()
def _voldrv_arakoon_checkup(create_cluster): def add_service(service_storagerouter, arakoon_result): """ Add a service to the storage router :param service_storagerouter: Storage Router to add the service to :type service_storagerouter: StorageRouter :param arakoon_result: Port information :type arakoon_result: Dictionary :return: The newly created and added service """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = [arakoon_result['client_port'], arakoon_result['messaging_port']] new_service.storagerouter = service_storagerouter new_service.save() return new_service cluster_name = 'voldrv' service_name = 'arakoon-voldrv' service_type = ServiceTypeList.get_by_name('Arakoon') current_services = [] current_ips = [] for service in service_type.services: if service.name == service_name: current_services.append(service) current_ips.append(service.storagerouter.ip) all_sr_ips = [storagerouter.ip for storagerouter in StorageRouterList.get_slaves()] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition(storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len(current_services) == 0 and len(available_storagerouters) > 0: storagerouter, partition = available_storagerouters.items()[0] result = ArakoonInstaller.create_cluster(cluster_name=cluster_name, ip=storagerouter.ip, base_dir=partition.folder) current_services.append(add_service(storagerouter, result)) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) current_ips.append(storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver() if 0 < len(current_services) < len(available_storagerouters): for storagerouter, partition in available_storagerouters.iteritems(): if storagerouter.ip in current_ips: continue result = ArakoonInstaller.extend_cluster( current_services[0].storagerouter.ip, storagerouter.ip, cluster_name, partition.folder ) add_service(storagerouter, result) current_ips.append(storagerouter.ip) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver()
def remove_mds_service(mds_service, vpool, reload_config): """ Removes an MDS service :param mds_service: The MDS service to remove :param vpool: The vPool for which the MDS service will be removed :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') storagerouter = mds_service.service.storagerouter client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() mds_service.delete() mds_service.service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB] mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp[0], 'scratch_directory': sdp[0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: logger.debug('Recursively removed {0}'.format(dir_name)) break except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1 if tries == 0: raise
def on_extranode(cluster_ip, master_ip=None): """ An extra node is added, make sure it has the voldrv arakoon client file if possible """ _ = master_ip # The master_ip will be passed in by caller servicetype = ServiceTypeList.get_by_name('Arakoon') for service in servicetype.services: if service.name == 'arakoon-voldrv': ArakoonInstaller.deploy_to_slave(service.storagerouter.ip, cluster_ip, 'voldrv') break
def get_services_by_name(name): """ Retrieve all services for a certain type :param name: Name of the service type :return: Data-object list of Services """ service_type_names = [service_type.name for service_type in GeneralService.get_service_types()] if name not in service_type_names: raise ValueError('Invalid Service Type name specified. Please choose from: {0}'.format(', '.format(service_type_names))) return ServiceTypeList.get_by_name(name).services
def remove_mds_service(mds_service, client, storagerouter, vpool, reload_config): """ Removes an MDS service """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Clean up model this_service_number = mds_service.number service = mds_service.service mds_service.delete() service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, mds_service.number), 'scratch_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, mds_service.number)}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 cleaned = False while tries > 0 and cleaned is False: try: client.dir_delete(['{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, this_service_number), '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, this_service_number)]) logger.debug('MDS files cleaned up') cleaned = True except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1
def get_by_storagedriver(storagedriver_guid): """ Returns a list of MDSServices based on the StorageDriver (via StorageRouter > Service and Vpool) * This list uses object relations instead of queries for better performance """ mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) storagedriver = StorageDriver(storagedriver_guid) for service in storagedriver.storagerouter.services: if service.type_guid == mdsservice_type.guid and service.mds_service.vpool_guid == storagedriver.vpool_guid: return service.mds_service return None
def get_by_storagedriver(storagedriver_guid): """ Returns a list of MDSServices based on the StorageDriver (via StorageRouter > Service and Vpool) * This list uses object relations instead of queries for better performance """ mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = StorageDriver(storagedriver_guid) for service in storagedriver.storagerouter.services: if service.type_guid == mdsservice_type.guid and service.mds_service.vpool_guid == storagedriver.vpool_guid: return service.mds_service return None
def create(self): """ Prepares a new Storagedriver for a given vPool and Storagerouter :return: None :rtype: NoneType """ if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') machine_id = System.get_my_machine_id(client=self.sr_installer.root_client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) storagerouter = self.sr_installer.storagerouter with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30): model_ports_in_use = [] for sd in StorageDriverList.get_storagedrivers(): if sd.storagerouter_guid == storagerouter.guid: model_ports_in_use += sd.ports.values() for proxy in sd.alba_proxies: model_ports_in_use.append(proxy.service.ports[0]) ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client) vpool = self.vp_installer.vpool vrouter_id = '{0}{1}'.format(vpool.name, machine_id) storagedriver = StorageDriver() storagedriver.name = vrouter_id.replace('_', ' ') storagedriver.ports = {'management': ports[0], 'xmlrpc': ports[1], 'dtl': ports[2], 'edge': ports[3]} storagedriver.vpool = vpool storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)) storagedriver.storage_ip = self.storage_ip storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name) storagedriver.description = storagedriver.name storagedriver.storagerouter = storagerouter storagedriver.storagedriver_id = vrouter_id storagedriver.save() # ALBA Proxies proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY) for proxy_id in xrange(self.sr_installer.requested_proxies): service = Service() service.storagerouter = storagerouter service.ports = [ports[4 + proxy_id]] service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id) service.type = proxy_service_type service.save() alba_proxy = AlbaProxy() alba_proxy.service = service alba_proxy.storagedriver = storagedriver alba_proxy.save() self.storagedriver = storagedriver
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient( cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: StorageDriverController._logger.debug( '* Shrink StorageDriver cluster') cluster_name = str( EtcdConfiguration.get( '/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, cluster_name=cluster_name, offline_nodes=offline_node_ips) if client is not None and ServiceManager.has_service( current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def remove_mds_service(mds_service, client, storagerouter, vpool, reload_config): """ Removes an MDS service """ if len(mds_service.vdisks_guids) > 0: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() service = mds_service.service mds_service.delete() service.delete() # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB][0], 'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.SCRUB][0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) tries = 5 cleaned = False while tries > 0 and cleaned is False: try: client.dir_delete(directories_to_clean) logger.debug('MDS files cleaned up') cleaned = True except Exception: time.sleep(5) logger.debug('Waiting for the MDS service to go down...') tries -= 1
def _metadata_information(self): """ Returns metadata information about the backend """ from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.servicetypelist import ServiceTypeList info = {'nsm_partition_guids': []} nsm_service_name = self.backend.name + "-nsm_0" nsm_service_type = ServiceTypeList.get_by_name('NamespaceManager') for service in nsm_service_type.services: if service.name == nsm_service_name: for disk in service.storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: info['nsm_partition_guids'].append(partition.guid) return info
def _on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] servicetype = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster services do not have StorageRouters if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: if len(remaining_ips) == 0: raise RuntimeError( 'Could not find any remaining arakoon nodes for the voldrv cluster' ) StorageDriverController._logger.debug( '* Shrink StorageDriver cluster') cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.load() arakoon_installer.shrink_cluster(removal_ip=cluster_ip, offline_nodes=offline_node_ips) arakoon_installer.restart_cluster_after_shrinking() current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: StorageDriverController._logger.debug('* Shrink StorageDriver cluster') cluster_name = str(EtcdConfiguration.get('/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, cluster_name=cluster_name, offline_nodes=offline_node_ips) if client is not None and ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in ['MetadataServer', 'AlbaProxy', 'Arakoon']: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Brandings branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.extensions.generic.remote import Remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name('MetadataServer').services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: stat_dir = directory while not remote.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = remote.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote: inode = remote.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool.name) config.load(SSHClient(storagedriver.storagerouter, username='******')) for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 return working_version
def test_arakoon_collapse(self): """ Test the Arakoon collapse functionality """ # Set up the test structure = DalHelper.build_dal_structure( structure={'storagerouters': [1, 2]}) storagerouter_1 = structure['storagerouters'][1] storagerouter_2 = structure['storagerouters'][2] MockedSSHClient._run_returns[storagerouter_1.ip] = {} MockedSSHClient._run_returns[storagerouter_2.ip] = {} # Make sure we cover all Arakoon cluster types clusters_to_create = { ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{ 'name': 'unittest-voldrv', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{ 'name': 'unittest-cacc', 'internal': True, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{ 'name': 'unittest-ovsdb', 'internal': True, 'success': False }], ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{ 'name': 'unittest-cluster-1-abm', 'internal': True, 'success': False }, { 'name': 'unittest-random-abm-name', 'internal': False, 'success': True }], ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{ 'name': 'unittest-cluster-1-nsm_0', 'internal': True, 'success': True }] } self.assertEqual( first=sorted(clusters_to_create.keys()), second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()), msg= 'An Arakoon cluster type has been removed or added, please update this test accordingly' ) # Create all Arakoon clusters and related services failed_clusters = [] external_clusters = [] successful_clusters = [] for cluster_type, cluster_infos in clusters_to_create.iteritems(): filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG for cluster_info in cluster_infos: internal = cluster_info['internal'] cluster_name = cluster_info['name'] base_dir = DalHelper.CLUSTER_DIR.format(cluster_name) arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir=base_dir, internal=internal) arakoon_installer.start_cluster() arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir=base_dir) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) else: service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) if internal is True: DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_1, ports=arakoon_installer.ports[storagerouter_1.ip]) DalHelper.create_service( service_name=service_name, service_type=service_type, storagerouter=storagerouter_2, ports=arakoon_installer.ports[storagerouter_2.ip]) else: DalHelper.create_service(service_name=service_name, service_type=service_type) external_clusters.append(cluster_name) continue if cluster_info['success'] is True: if filesystem is True: config_path = ArakoonClusterConfig.CONFIG_FILE.format( cluster_name) else: config_path = Configuration.get_configuration_path( ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) MockedSSHClient._run_returns[storagerouter_1.ip][ 'arakoon --collapse-local 1 2 -config {0}'.format( config_path)] = None MockedSSHClient._run_returns[storagerouter_2.ip][ 'arakoon --collapse-local 2 2 -config {0}'.format( config_path)] = None successful_clusters.append(cluster_name) else: # For successful False clusters we don't emulate the collapse, thus making it fail failed_clusters.append(cluster_name) # Start collapse and make it fail for all clusters on StorageRouter 2 SSHClient._raise_exceptions[storagerouter_2.ip] = { 'users': ['ovs'], 'exception': UnableToConnectException('No route to host') } GenericController.collapse_arakoon() # Verify all log messages for each type of cluster generic_logs = Logger._logs.get('lib', {}) for cluster_name in successful_clusters + failed_clusters + external_clusters: collect_msg = ( 'DEBUG', 'Collecting info for cluster {0}'.format(cluster_name)) unreachable_msg = ( 'ERROR', 'Could not collapse any cluster on {0} (not reachable)'.format( storagerouter_2.name)) end_collapse_msg = ( 'DEBUG', 'Collapsing cluster {0} on {1} completed'.format( cluster_name, storagerouter_1.ip)) start_collapse_msg = ('DEBUG', 'Collapsing cluster {0} on {1}'.format( cluster_name, storagerouter_1.ip)) failed_collapse_msg = ( 'ERROR', 'Collapsing cluster {0} on {1} failed'.format( cluster_name, storagerouter_1.ip)) messages_to_validate = [] if cluster_name in successful_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) elif cluster_name in failed_clusters: assert_function = self.assertIn messages_to_validate.append(collect_msg) messages_to_validate.append(unreachable_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(failed_collapse_msg) else: assert_function = self.assertNotIn messages_to_validate.append(collect_msg) messages_to_validate.append(start_collapse_msg) messages_to_validate.append(end_collapse_msg) for severity, message in messages_to_validate: if assert_function == self.assertIn: assert_message = 'Expected to find log message: {0}'.format( message) else: assert_message = 'Did not expect to find log message: {0}'.format( message) assert_function(member=message, container=generic_logs, msg=assert_message) if assert_function == self.assertIn: self.assertEqual( first=severity, second=generic_logs[message], msg='Log message {0} is of severity {1} expected {2}'. format(message, generic_logs[message], severity)) # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed for general_message in [ 'Arakoon collapse started', 'Arakoon collapse finished' ]: self.assertIn(member=general_message, container=generic_logs, msg='Expected to find log message: {0}'.format( general_message))
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ from ovs.lib.storagedriver import StorageDriverController mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = DalService() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() scrub_partition = None db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition if DiskPartition.ROLES.SCRUB in partition.roles: scrub_partition = partition if scrub_partition is None or db_partition is None: raise RuntimeError('Could not find DB or SCRUB partition on StorageRouter {0}'.format(storagerouter.name)) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.SCRUB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': scrub_partition, 'mds_service': mds_service}) mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0], 'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.SCRUB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def build_dal_structure(structure, previous_structure=None): """ Builds a model structure Example: structure = DalHelper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ Configuration.set(key=Configuration.EDITION_KEY, value=PackageFactory.EDITION_ENTERPRISE) if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_types = {} for service_type_name in ServiceType.SERVICE_TYPES.values(): service_type = ServiceTypeList.get_by_name(service_type_name) if service_type is None: service_type = ServiceType() service_type.name = service_type_name service_type.save() service_types[service_type_name] = service_type srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.metadata = {'backend': {}, 'caching_info': {}} vpool.metadata_store_bits = 5 vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_tlogs'.format(vpool.guid), 100) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid), 2) Configuration.set( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid), 75) Configuration.set( '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid), json.dumps({}, indent=4), raw=True) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() else: storagerouter = storagerouters[sr_id] # noinspection PyProtectedMember System._machine_id[storagerouter.ip] = str(sr_id) mds_start = 10000 + 100 * (sr_id - 1) mds_end = 10000 + 100 * sr_id - 1 arakoon_start = 20000 + 100 * (sr_id - 1) storagedriver_start = 30000 + 100 * (sr_id - 1) storagedriver_end = 30000 + 100 * sr_id - 1 Configuration.initialize_host( host_id=sr_id, port_info={ 'mds': [mds_start, mds_end], 'arakoon': arakoon_start, 'storagedriver': [storagedriver_start, storagedriver_end] }) for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver DalHelper.set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_types['MetadataServer'] service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = DalHelper.generate_mds_metadata_backend_config( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'mds_services': mds_services, 'service_types': service_types, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def get_partition_info(storagerouter_guid): """ Retrieves information about the partitions of a Storagerouter :param storagerouter_guid: Guid of the Storagerouter :type storagerouter_guid: str :return: dict with information about the partitions :rtype: dict """ storagerouter = StorageRouter(storagerouter_guid) client = SSHClient(endpoint=storagerouter) services_mds = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER).services services_arakoon = [ service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON).services if service.name != 'arakoon-ovsdb' and service.is_internal is True ] partitions = dict((role, []) for role in DiskPartition.ROLES) for disk in storagerouter.disks: for disk_partition in disk.partitions: claimed_space_by_fwk = 0 used_space_by_system = 0 available_space_by_system = 0 for storagedriver_partition in disk_partition.storagedrivers: claimed_space_by_fwk += storagedriver_partition.size if storagedriver_partition.size is not None else 0 if client.dir_exists(storagedriver_partition.path): try: used_space_by_system += int( client.run([ 'du', '-B', '1', '-d', '0', storagedriver_partition.path ], timeout=5).split('\t')[0]) except Exception as ex: StorageRouterController._logger.warning( 'Failed to get directory usage for {0}. {1}'. format(storagedriver_partition.path, ex)) if disk_partition.mountpoint is not None: for alias in disk_partition.aliases: StorageRouterController._logger.info( 'Verifying disk partition usage by checking path {0}' .format(alias)) disk_partition_device = client.file_read_link( path=alias) try: available_space_by_system = int( client.run([ 'df', '-B', '1', '--output=avail', disk_partition_device ], timeout=5).splitlines()[-1]) break except Exception as ex: StorageRouterController._logger.warning( 'Failed to get partition usage for {0}. {1}'. format(disk_partition.mountpoint, ex)) for role in disk_partition.roles: size = 0 if disk_partition.size is None else disk_partition.size if available_space_by_system > 0: # Take available space reported by df then add back used by roles so that the only used space reported is space not managed by us available = available_space_by_system + used_space_by_system - claimed_space_by_fwk else: available = size - claimed_space_by_fwk # Subtract size for roles which have already been claimed by other vpools (but not necessarily already been fully used) in_use = any(junction for junction in disk_partition.storagedrivers if junction.role == role) if role == DiskPartition.ROLES.DB: for service in services_arakoon: if service.storagerouter_guid == storagerouter_guid: in_use = True break for service in services_mds: if service.storagerouter_guid == storagerouter_guid: in_use = True break partitions[role].append({ 'ssd': disk.is_ssd, 'guid': disk_partition.guid, 'size': size, 'in_use': in_use, 'usable': True, # Sizes smaller than 1GiB and smaller than 5% of largest WRITE partition will be un-usable 'available': available if available > 0 else 0, 'mountpoint': disk_partition. folder, # Equals to mount point unless mount point is root ('/'), then we pre-pend mount point with '/mnt/storage' 'storagerouter_guid': storagerouter_guid }) # Strip out WRITE caches which are smaller than 5% of largest write cache size and smaller than 1GiB writecache_sizes = [] for partition_info in partitions[DiskPartition.ROLES.WRITE]: writecache_sizes.append(partition_info['available']) largest_write_cache = max( writecache_sizes) if len(writecache_sizes) > 0 else 0 for index, size in enumerate(writecache_sizes): if size < largest_write_cache * 5 / 100 or size < 1024**3: partitions[DiskPartition.ROLES.WRITE][index]['usable'] = False return partitions
def _voldrv_arakoon_checkup(create_cluster): def add_service(service_storagerouter, arakoon_result): """ Add a service to the storage router :param service_storagerouter: Storage Router to add the service to :type service_storagerouter: StorageRouter :param arakoon_result: Port information :type arakoon_result: Dictionary :return: The newly created and added service """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = [ arakoon_result['client_port'], arakoon_result['messaging_port'] ] new_service.storagerouter = service_storagerouter new_service.save() return new_service cluster_name = 'voldrv' service_name = 'arakoon-voldrv' service_type = ServiceTypeList.get_by_name('Arakoon') current_services = [] current_ips = [] for service in service_type.services: if service.name == service_name: current_services.append(service) current_ips.append(service.storagerouter.ip) all_sr_ips = [ storagerouter.ip for storagerouter in StorageRouterList.get_slaves() ] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition( storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len( current_services) == 0 and len(available_storagerouters) > 0: storagerouter, partition = available_storagerouters.items()[0] result = ArakoonInstaller.create_cluster(cluster_name=cluster_name, ip=storagerouter.ip, base_dir=partition.folder) current_services.append(add_service(storagerouter, result)) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) current_ips.append(storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver() if 0 < len(current_services) < len(available_storagerouters): for storagerouter, partition in available_storagerouters.iteritems( ): if storagerouter.ip in current_ips: continue result = ArakoonInstaller.extend_cluster( current_services[0].storagerouter.ip, storagerouter.ip, cluster_name, partition.folder) add_service(storagerouter, result) current_ips.append(storagerouter.ip) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver()
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :param vpool: The vPool for which the MDS service will be removed :param reconfigure: Indicates whether reconfiguration is required :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError('Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if allow_offline is True: # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [sd_partition.path for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB] mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp[0], 'scratch_directory': sdp[0]}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: logger.debug('Recursively removed {0}'.format(dir_name)) break except Exception: logger.debug('Waiting for the MDS service to go down...') time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: logger.info('Allowed offline node during mds service removal') else: raise
def get_service_types(): """ Retrieve all service types :return: Data-object list of ServiceTypes """ return ServiceTypeList.get_servicetypes()
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, start=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() # Prepare some directores scratch_dir = '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, service_number) rocksdb_dir = '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, service_number) client.run('mkdir -p {0}'.format(scratch_dir)) client.run('mkdir -p {0}'.format(rocksdb_dir)) # Generate the configuration file metadataserver_config = StorageDriverConfiguration('metadataserver', vpool.name, number=service_number) metadataserver_config.load(client) metadataserver_config.clean() # Clean out obsolete values if vpool.backend_type.code == 'alba': metadataserver_config.configure_backend_connection_manager(alba_connection_host='127.0.0.1', alba_connection_port=storagedriver.alba_proxy.service.ports[0], backend_type='ALBA') else: metadataserver_config.configure_backend_connection_manager(**vpool.metadata) metadataserver_config.configure_metadata_server(mds_address=storagerouter.ip, mds_port=service.ports[0], mds_scratch_dir=scratch_dir, mds_rocksdb_path=rocksdb_dir) metadataserver_config.save(client) # Create system services params = {'<VPOOL_NAME>': vpool.name, '<SERVICE_NUMBER>': str(service_number)} template_dir = '/opt/OpenvStorage/config/templates/upstart' client.run('cp -f {0}/ovs-metadataserver.conf {0}/ovs-metadataserver_{1}_{2}.conf'.format(template_dir, vpool.name, service_number)) service_script = """ from ovs.plugin.provider.service import Service Service.add_service(package=('openvstorage', 'metadataserver'), name='metadataserver_{0}_{1}', command=None, stop_command=None, params={2}) """.format(vpool.name, service_number, params) System.exec_remote_python(client, service_script) if start is True: System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service Service.enable_service('{0}') """.format(service.name)) System.exec_remote_python(client, """ from ovs.plugin.provider.service import Service Service.start_service('{0}') """.format(service.name)) return mds_service
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.') target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name='config', base_dir=Configuration.get('/ovs/framework/paths|ovsdb'), ports=[26400, 26401], filesystem=True) ArakoonInstaller.restart_cluster_add(cluster_name='config', current_ips=metadata['ips'], new_ip=cluster_ip, filesystem=True) ServiceManager.register_service(node_name=machine_id, service_metadata=metadata['service_metadata']) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster') EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') result = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name=arakoon_cluster_name, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=result['ips'], new_ip=cluster_ip, filesystem=False) arakoon_ports = [result['client_port'], result['messaging_port']] if configure_memcached is True: NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=master_node_ips, new_ip=cluster_ip, filesystem=False) PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def migrate(previous_version): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float """ working_version = previous_version if working_version == 0: from ovs.dal.hybrids.servicetype import ServiceType # Initial version: # * Add any basic configuration or model entries # Add backends for backend_type_info in [('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.ALBA_S3_TRANSACTION ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # From here on, all actual migration should happen to get to the expected state for THIS RELEASE elif working_version < DALMigrator.THIS_VERSION: import hashlib from ovs.dal.exceptions import ObjectNotFoundException from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.hybrids.albaabmcluster import ABMCluster from ovs.dal.hybrids.albaosd import AlbaOSD from ovs.dal.hybrids.albansmcluster import NSMCluster from ovs.dal.hybrids.j_abmservice import ABMService from ovs.dal.hybrids.j_nsmservice import NSMService from ovs.dal.hybrids.service import Service from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig, ArakoonInstaller from ovs.extensions.generic.configuration import Configuration, NotFoundException from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.plugins.albacli import AlbaCLI from ovs.extensions.storage.persistentfactory import PersistentFactory # Migrate unique constraints & indexes client = PersistentFactory.get_client() hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() classname = cls.__name__.lower() unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname) index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname) index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname) uniques = [] indexes = [] # noinspection PyProtectedMember for prop in cls._properties: if prop.unique is True and len([ k for k in client.prefix( unique_key.format(prop.name)) ]) == 0: uniques.append(prop.name) if prop.indexed is True and len([ k for k in client.prefix( index_prefix.format(prop.name)) ]) == 0: indexes.append(prop.name) if len(uniques) > 0 or len(indexes) > 0: prefix = 'ovs_data_{0}_'.format(classname) for key, data in client.prefix_entries(prefix): for property_name in uniques: ukey = '{0}{1}'.format( unique_key.format(property_name), hashlib.sha1(str( data[property_name])).hexdigest()) client.set(ukey, key) for property_name in indexes: if property_name not in data: continue # This is the case when there's a new indexed property added. ikey = index_key.format( property_name, hashlib.sha1(str( data[property_name])).hexdigest()) index = list( client.get_multi([ikey], must_exist=False))[0] transaction = client.begin_transaction() if index is None: client.assert_value(ikey, None, transaction=transaction) client.set(ikey, [key], transaction=transaction) elif key not in index: client.assert_value(ikey, index[:], transaction=transaction) client.set(ikey, index + [key], transaction=transaction) client.apply_transaction(transaction) ############################################# # Introduction of ABMCluster and NSMCluster # ############################################# # Verify presence of unchanged ALBA Backends alba_backends = AlbaBackendList.get_albabackends() changes_required = False for alba_backend in alba_backends: if alba_backend.abm_cluster is None or len( alba_backend.nsm_clusters) == 0: changes_required = True break if changes_required: # Retrieve ABM and NSM clusters abm_cluster_info = [] nsm_cluster_info = [] for cluster_name in Configuration.list('/ovs/arakoon'): try: metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: abm_cluster_info.append(metadata) elif metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: nsm_cluster_info.append(metadata) except NotFoundException: continue # Retrieve NSM Arakoon cluster information cluster_arakoon_map = {} for cluster_info in abm_cluster_info + nsm_cluster_info: cluster_name = cluster_info['cluster_name'] arakoon_config = ArakoonClusterConfig( cluster_id=cluster_name) cluster_arakoon_map[ cluster_name] = arakoon_config.export_dict() storagerouter_map = dict( (storagerouter.machine_id, storagerouter) for storagerouter in StorageRouterList.get_storagerouters()) alba_backend_id_map = dict((alba_backend.alba_id, alba_backend) for alba_backend in alba_backends) for cluster_info in abm_cluster_info: internal = cluster_info['internal'] cluster_name = cluster_info['cluster_name'] config_location = Configuration.get_configuration_path( key=ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) try: alba_id = AlbaCLI.run(command='get-alba-id', config=config_location, named_params={'attempts': 3})['id'] nsm_hosts = AlbaCLI.run(command='list-nsm-hosts', config=config_location, named_params={'attempts': 3}) except RuntimeError: continue alba_backend = alba_backend_id_map.get(alba_id) if alba_backend is None: # ALBA Backend with ID not found in model continue if alba_backend.abm_cluster is not None and len( alba_backend.nsm_clusters ) > 0: # Clusters already exist continue # Create ABM Cluster if alba_backend.abm_cluster is None: abm_cluster = ABMCluster() abm_cluster.name = cluster_name abm_cluster.alba_backend = alba_backend abm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( cluster_name) abm_cluster.save() else: abm_cluster = alba_backend.abm_cluster # Create ABM Services abm_arakoon_config = cluster_arakoon_map[cluster_name] abm_arakoon_config.pop('global') arakoon_nodes = abm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-abm'.format( alba_backend.name) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) if internal is True: arakoon_node_config = abm_arakoon_config[ arakoon_nodes[index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[index]] else: service.ports = [] service.storagerouter = None service.save() abm_service = ABMService() abm_service.service = service abm_service.abm_cluster = abm_cluster abm_service.save() # Create NSM Clusters for cluster_index, nsm_host in enumerate( sorted(nsm_hosts, key=lambda host: ExtensionsToolbox. advanced_sort(host['cluster_id'], '_'))): nsm_cluster_name = nsm_host['cluster_id'] nsm_arakoon_config = cluster_arakoon_map.get( nsm_cluster_name) if nsm_arakoon_config is None: continue number = cluster_index if internal is False else int( nsm_cluster_name.split('_')[-1]) nsm_cluster = NSMCluster() nsm_cluster.name = nsm_cluster_name nsm_cluster.number = number nsm_cluster.alba_backend = alba_backend nsm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( nsm_cluster_name) nsm_cluster.save() # Create NSM Services nsm_arakoon_config.pop('global') arakoon_nodes = nsm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for service_index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-nsm_{1}'.format( alba_backend.name, number) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) if internal is True: arakoon_node_config = nsm_arakoon_config[ arakoon_nodes[service_index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[service_index]] else: service.ports = [] service.storagerouter = None service.save() nsm_service = NSMService() nsm_service.service = service nsm_service.nsm_cluster = nsm_cluster nsm_service.save() # Clean up all junction services no longer linked to an ALBA Backend all_nsm_services = [ service.nsm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR).services if service.nsm_service.nsm_cluster is None ] all_abm_services = [ service.abm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR).services if service.abm_service.abm_cluster is None ] for abm_service in all_abm_services: abm_service.delete() abm_service.service.delete() for nsm_service in all_nsm_services: nsm_service.delete() nsm_service.service.delete() ################################ # Introduction of Active Drive # ################################ # Update slot_id and Alba Node relation for all OSDs client = PersistentFactory.get_client() disk_osd_map = {} for key, data in client.prefix_entries('ovs_data_albaosd_'): alba_disk_guid = data.get('alba_disk', {}).get('guid') if alba_disk_guid is not None: if alba_disk_guid not in disk_osd_map: disk_osd_map[alba_disk_guid] = [] disk_osd_map[alba_disk_guid].append( key.replace('ovs_data_albaosd_', '')) try: value = client.get(key) value.pop('alba_disk', None) client.set(key=key, value=value) except Exception: pass # We don't care if we would have any leftover AlbaDisk information in _data, but its cleaner not to alba_guid_node_map = dict( (an.guid, an) for an in AlbaNodeList.get_albanodes()) for key, data in client.prefix_entries('ovs_data_albadisk_'): alba_disk_guid = key.replace('ovs_data_albadisk_', '') alba_node_guid = data.get('alba_node', {}).get('guid') if alba_disk_guid in disk_osd_map and alba_node_guid in alba_guid_node_map and len( data.get('aliases', [])) > 0: slot_id = data['aliases'][0].split('/')[-1] for osd_guid in disk_osd_map[alba_disk_guid]: try: osd = AlbaOSD(osd_guid) except ObjectNotFoundException: continue osd.slot_id = slot_id osd.alba_node = alba_guid_node_map[alba_node_guid] osd.save() client.delete(key=key, must_exist=False) # Remove unique constraints for AlbaNode IP for key in client.prefix('ovs_unique_albanode_ip_'): client.delete(key=key, must_exist=False) # Remove relation for all Alba Disks for key in client.prefix('ovs_reverseindex_albadisk_'): client.delete(key=key, must_exist=False) # Remove the relation between AlbaNode and AlbaDisk for key in client.prefix('ovs_reverseindex_albanode_'): if '|disks|' in key: client.delete(key=key, must_exist=False) return DALMigrator.THIS_VERSION
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|mds'.format( System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( 'Failed to find an available port on storage router {0} within range {1}' .format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError( 'Could not find DB partition on storage router {0}'.format( storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [ sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid ] if not storagedrivers: raise RuntimeError( 'Expected to find a configured storagedriver for vpool {0} on storage router {1}' .format(vpool.name, storagerouter.name)) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service }) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def _voldrv_arakoon_checkup(create_cluster): def add_service(service_storagerouter, arakoon_ports): """ Add a service to the storage router :param service_storagerouter: Storage Router to add the service to :type service_storagerouter: StorageRouter :param arakoon_ports: Port information :type arakoon_ports: list :return: The newly created and added service :rtype: Service """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = arakoon_ports new_service.storagerouter = service_storagerouter new_service.save() return new_service service_name = 'arakoon-voldrv' service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) current_ips = [] current_services = [] for service in service_type.services: if service.name == service_name: current_services.append(service) if service.is_internal is True: current_ips.append(service.storagerouter.ip) all_sr_ips = [storagerouter.ip for storagerouter in StorageRouterList.get_slaves()] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition(storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len(current_services) == 0: # Create new cluster metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim(cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD) if metadata is None: # No externally managed cluster found, we create 1 ourselves if not available_storagerouters: raise RuntimeError('Could not find any Storage Router with a DB role') storagerouter, partition = available_storagerouters.items()[0] result = ArakoonInstaller.create_cluster(cluster_name='voldrv', cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouter.ip, base_dir=partition.folder, filesystem=False) ports = [result['client_port'], result['messaging_port']] metadata = result['metadata'] ArakoonInstaller.restart_cluster_add(cluster_name='voldrv', current_ips=current_ips, new_ip=storagerouter.ip, filesystem=False) ArakoonInstaller.claim_cluster(cluster_name='voldrv', master_ip=storagerouter.ip, filesystem=False, metadata=metadata) current_ips.append(storagerouter.ip) else: ports = [] storagerouter = None cluster_name = metadata['cluster_name'] Configuration.set('/ovs/framework/arakoon_clusters|voldrv', cluster_name) StorageDriverController._logger.info('Claiming {0} managed arakoon cluster: {1}'.format('externally' if storagerouter is None else 'internally', cluster_name)) StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name) current_services.append(add_service(service_storagerouter=storagerouter, arakoon_ports=ports)) cluster_name = Configuration.get('/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is None: return metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if 0 < len(current_services) < len(available_storagerouters) and metadata['internal'] is True: for storagerouter, partition in available_storagerouters.iteritems(): if storagerouter.ip in current_ips: continue result = ArakoonInstaller.extend_cluster(master_ip=current_services[0].storagerouter.ip, new_ip=storagerouter.ip, cluster_name=cluster_name, base_dir=partition.folder) add_service(storagerouter, [result['client_port'], result['messaging_port']]) current_ips.append(storagerouter.ip) ArakoonInstaller.restart_cluster_add(cluster_name=cluster_name, current_ips=current_ips, new_ip=storagerouter.ip, filesystem=False) StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get("vdisks", {}) vpools = previous_structure.get("vpools", {}) domains = previous_structure.get("domains", {}) services = previous_structure.get("services", {}) mds_services = previous_structure.get("mds_services", {}) storagerouters = previous_structure.get("storagerouters", {}) storagedrivers = previous_structure.get("storagedrivers", {}) storagerouter_domains = previous_structure.get("storagerouter_domains", {}) service_type = ServiceTypeList.get_by_name("MetadataServer") if service_type is None: service_type = ServiceType() service_type.name = "MetadataServer" service_type.save() srclients = {} for domain_id in structure.get("domains", []): if domain_id not in domains: domain = Domain() domain.name = "domain_{0}".format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get("vpools", []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = "RUNNING" vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get("storagerouters", []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = "10.0.0.{0}".format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = "MASTER" storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = "OK" disk.name = "/dev/uda" disk.size = 1 * 1024 ** 4 disk.is_ssd = True disk.aliases = ["/dev/uda"] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ["/dev/uda-1"] partition.state = "OK" partition.mountpoint = "/tmp/unittest/sr_{0}/disk_1/partition_1".format(sr_id) partition.disk = disk partition.roles = [DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB] partition.save() for sd_id, vpool_id, sr_id in structure.get("storagedrivers", ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = "/" storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = "10.0.1.{0}".format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = {"management": 1, "xmlrpc": 2, "dtl": 3, "edge": 4} storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration(vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get("mds_services", ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = "{0}-{1}".format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": sd.storagerouter.disks[0].partitions[0], "mds_service": mds_service, }, ) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get("vdisks", ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = "vdisk_{0}".format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]] ) volume_id = srclients[vpool_id].create_volume(devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client("storagedriver") vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get("storagerouter_domains", ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { "vdisks": vdisks, "vpools": vpools, "domains": domains, "services": services, "service_type": service_type, "mds_services": mds_services, "storagerouters": storagerouters, "storagedrivers": storagedrivers, "storagerouter_domains": storagerouter_domains, }
def _voldrv_arakoon_checkup(create_cluster): def add_service(service_storagerouter, arakoon_result): new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = [arakoon_result['client_port'], arakoon_result['messaging_port']] new_service.storagerouter = service_storagerouter new_service.save() return new_service cluster_name = 'voldrv' service_name = 'arakoon-voldrv' service_type = ServiceTypeList.get_by_name('Arakoon') current_services = [] current_ips = [] for service in service_type.services: if service.name == service_name: current_services.append(service) current_ips.append(service.storagerouter.ip) all_sr_ips = [storagerouter.ip for storagerouter in StorageRouterList.get_slaves()] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition(storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len(current_services) == 0 and len(available_storagerouters) > 0: storagerouter, partition = available_storagerouters.items()[0] result = ArakoonInstaller.create_cluster(cluster_name=cluster_name, ip=storagerouter.ip, exclude_ports=ServiceList.get_ports_for_ip(storagerouter.ip), base_dir=partition.folder) current_services.append(add_service(storagerouter, result)) for sr_ip in all_sr_ips: if sr_ip not in current_ips: ArakoonInstaller.deploy_to_slave(storagerouter.ip, sr_ip, cluster_name) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) current_ips.append(storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver() if 0 < len(current_services) < len(available_storagerouters): distributed = False for storagerouter, partition in available_storagerouters.iteritems(): if storagerouter.ip in current_ips: continue result = ArakoonInstaller.extend_cluster( current_services[0].storagerouter.ip, storagerouter.ip, cluster_name, ServiceList.get_ports_for_ip(storagerouter.ip), partition.folder ) add_service(storagerouter, result) current_ips.append(storagerouter.ip) if distributed is False: distributed = True for sr_ip in all_sr_ips: if sr_ip not in current_ips: ArakoonInstaller.deploy_to_slave(current_services[0].storagerouter.ip, sr_ip, cluster_name) ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver()
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :param vpool: The vPool for which the MDS service will be created :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :param reload_config: If True, the volumedriver's updated configuration will be reloaded """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = client.config_read('ovs.ports.mds') free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError('Failed to find an available port on storage router {0} within range {1}'.format(storagerouter.name, mds_port_range)) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on storage router {0}'.format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError('Expected to find a configured storagedriver for vpool {0} on storage router {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController sdp = StorageDriverController.add_storagedriverpartition(storagedrivers[0], {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) service_manager = ServiceFactory.get_manager() if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for promoting a node.' ) target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_ip) arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) service_manager.register_service( node_name=machine_id, service_metadata=arakoon_installer.service_metadata[cluster_ip] ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) arakoon_ports = arakoon_installer.ports[cluster_ip] if configure_memcached is True: NodeTypeController.configure_memcached( client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [ s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip ]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq( client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying RabbitMQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=0400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run([ 'rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname) ]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues ServiceFactory.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode( client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if service_manager.has_service(service, client=target_client): ServiceFactory.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run( ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set( '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [ (admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role]) ] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists(stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink({sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems(): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max(sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name)) else: source = entry client = SSHClient(storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.symlink({sd_partition.path: source}) migrated_objects[source] = sd_partition if is_list: storagedriver._data[key].remove(entry) if len(storagedriver._data[key]) == 0: del storagedriver._data[key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk('/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join([root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk(ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join([sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run('cat {0}'.format(state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, {'type': DataList.where_operator.AND, 'items': []}) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = {'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier']} vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def prepare_mds_service(cls, storagerouter, vpool): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and vPool are already configured with a StorageDriver and that all model-wise configurations regarding both have been completed. :param storagerouter: StorageRouter on which the MDS service will be created :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: ovs.dal.hybrids.vpool.VPool :raises RuntimeError: vPool is not extended on StorageRouter No ServiceType found for 'MetadataServer' No free port is found for the new MDSService No partition found on StorageRouter with DB role :return: Newly created junction service :rtype: ovs.dal.hybrids.j_mdsservice.MDSService """ from ovs.lib.storagedriver import StorageDriverController # Import here to prevent from circular imports cls._logger.info('StorageRouter {0} - vPool {1}: Preparing MDS junction service'.format(storagerouter.name, vpool.name)) mds_service = MDSService() with volatile_mutex(name='prepare_mds_{0}'.format(storagerouter.guid), wait=30): # VALIDATIONS # Verify passed StorageRouter is part of the vPool storagerouter.invalidate_dynamics(['vpools_guids']) if vpool.guid not in storagerouter.vpools_guids: raise RuntimeError('StorageRouter {0} is not part of vPool {1}'.format(storagerouter.name, vpool.name)) # Verify ServiceType existence mds_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) if mds_service_type is None: raise RuntimeError('No ServiceType found with name {0}'.format(ServiceType.SERVICE_TYPES.MD_SERVER)) # Retrieve occupied ports for current StorageRouter and max MDSService number for current vPool/StorageRouter combo service_number = -1 occupied_ports = [] for service in mds_service_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) if service.mds_service.vpool_guid == vpool.guid: service_number = max(service.mds_service.number, service_number) client = SSHClient(endpoint=storagerouter) mds_port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|mds'.format(System.get_my_machine_id(client))) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, amount=1, client=client) if len(free_ports) != 1: raise RuntimeError('Failed to find an available port on StorageRouter {0} within range {1}'.format(storagerouter.name, mds_port_range)) # Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError('Could not find DB partition on StorageRouter {0}'.format(storagerouter.name)) # Verify StorageDriver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if len(storagedrivers) != 1: raise RuntimeError('Expected to find a configured StorageDriver for vPool {0} on StorageRouter {1}'.format(vpool.name, storagerouter.name)) # MODEL UPDATES # Service and MDS service service_number += 1 cls._logger.info('StorageRouter {0} - vPool {1}: Adding junction service with number {2}'.format(storagerouter.name, vpool.name, service_number)) service = Service() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mds_service_type service.ports = free_ports service.storagerouter = storagerouter service.save() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # StorageDriver partitions cls._logger.info('StorageRouter {0} - vPool {1}: Adding StorageDriverPartition on partition with mount point {2}'.format(storagerouter.name, vpool.name, db_partition.mountpoint)) storagedriver = storagedrivers[0] sdp = StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': db_partition, 'mds_service': mds_service}) # CONFIGURATIONS # Volumedriver mds_nodes = [] for sd_partition in storagedriver.partitions: if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service is not None: service = sd_partition.mds_service.service mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/db'.format(sd_partition.path), 'scratch_directory': '{0}/scratch'.format(sd_partition.path)}) cls._logger.info('StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}'.format(storagerouter.name, vpool.name, mds_nodes)) # Generate the correct section in the StorageDriver's configuration try: storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client) except Exception: cls._logger.exception('StorageRouter {0} - vPool {1}: Configuring StorageDriver failed. Reverting model changes'.format(storagerouter.name, vpool.name)) # Clean up model changes if error occurs sdp.delete() mds_service.delete() # Must be removed before the service service.delete() return mds_service
def shrink_vpool(cls, storagedriver_guid, offline_storage_router_guids=list()): """ Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well) :param storagedriver_guid: Guid of the StorageDriver to remove :type storagedriver_guid: str :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster. WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS :type offline_storage_router_guids: list :return: None :rtype: NoneType """ # TODO: Add logging # TODO: Unit test individual pieces of code # Validations storagedriver = StorageDriver(storagedriver_guid) storagerouter = storagedriver.storagerouter cls._logger.info( 'StorageDriver {0} - Deleting StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) vp_installer = VPoolInstaller(name=storagedriver.vpool.name) vp_installer.validate(storagedriver=storagedriver) sd_installer = StorageDriverInstaller(vp_installer=vp_installer, storagedriver=storagedriver) cls._logger.info( 'StorageDriver {0} - Checking availability of related StorageRouters' .format(storagedriver.guid, storagedriver.name)) sr_client_map = SSHClient.get_clients(endpoints=[ sd.storagerouter for sd in vp_installer.vpool.storagedrivers ], user_names=['root']) sr_installer = StorageRouterInstaller(root_client=sr_client_map.get( storagerouter, {}).get('root'), storagerouter=storagerouter, vp_installer=vp_installer, sd_installer=sd_installer) offline_srs = sr_client_map.pop('offline') if sorted([sr.guid for sr in offline_srs ]) != sorted(offline_storage_router_guids): raise RuntimeError('Not all StorageRouters are reachable') if storagerouter not in offline_srs: mtpt_pids = sr_installer.root_client.run( "lsof -t +D '/mnt/{0}' || true".format( vp_installer.name.replace(r"'", r"'\''")), allow_insecure=True).splitlines() if len(mtpt_pids) > 0: raise RuntimeError( 'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}' .format(', '.join(mtpt_pids))) # Retrieve reachable StorageDrivers reachable_storagedrivers = [] for sd in vp_installer.vpool.storagedrivers: if sd.storagerouter not in sr_client_map: # StorageRouter is offline continue sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format( vp_installer.vpool.guid, sd.storagedriver_id) if Configuration.exists(sd_key) is True: path = Configuration.get_configuration_path(sd_key) with remote(sd.storagerouter.ip, [LocalStorageRouterClient]) as rem: try: lsrc = rem.LocalStorageRouterClient(path) lsrc.server_revision( ) # 'Cheap' call to verify whether volumedriver is responsive cls._logger.info( 'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}' .format(storagedriver.guid, sd.name, sd.storagerouter.ip)) reachable_storagedrivers.append(sd) except Exception as exception: if not is_connection_failure(exception): raise if len(reachable_storagedrivers) == 0: raise RuntimeError( 'Could not find any responsive node in the cluster') # Start removal if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.SHRINKING) else: vp_installer.update_status(status=VPool.STATUSES.DELETING) # Clean up stale vDisks cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format( storagedriver.guid)) VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool) # Reconfigure the MDSes cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format( storagedriver.guid)) for vdisk_guid in storagerouter.vdisks_guids: try: MDSServiceController.ensure_safety( vdisk_guid=vdisk_guid, excluded_storagerouter_guids=[storagerouter.guid] + offline_storage_router_guids) except Exception: cls._logger.exception( 'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed' .format(storagedriver.guid, vdisk_guid)) # Validate that all MDSes on current StorageRouter have been moved away # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code vdisks = [] for mds in vp_installer.mds_services: for junction in mds.vdisks: vdisk = junction.vdisk if vdisk in vdisks: continue vdisks.append(vdisk) cls._logger.critical( 'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away' .format(storagedriver.guid, vdisk.guid, vdisk.name)) if len(vdisks) > 0: # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway vp_installer.update_status(status=VPool.STATUSES.RUNNING) raise RuntimeError( 'Not all MDS Services have been successfully migrated away') # Start with actual removal errors_found = False if storagerouter not in offline_srs: errors_found &= sd_installer.stop_services() errors_found &= vp_installer.configure_cluster_registry( exclude=[storagedriver], apply_on=reachable_storagedrivers) errors_found &= vp_installer.update_node_distance_map() errors_found &= vp_installer.remove_mds_services() errors_found &= sd_installer.clean_config_management() errors_found &= sd_installer.clean_model() if storagerouter not in offline_srs: errors_found &= sd_installer.clean_directories( mountpoints=StorageRouterController.get_mountpoints( client=sr_installer.root_client)) try: DiskController.sync_with_reality( storagerouter_guid=storagerouter.guid) except Exception: cls._logger.exception( 'StorageDriver {0} - Synchronizing disks with reality failed' .format(storagedriver.guid)) errors_found = True if vp_installer.storagedriver_amount > 1: # Update the vPool metadata and run DTL checkup vp_installer.vpool.metadata['caching_info'].pop( sr_installer.storagerouter.guid, None) vp_installer.vpool.save() try: VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid, ensure_single_timeout=600) except Exception: cls._logger.exception( 'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}' .format(storagedriver.guid, vp_installer.name, vp_installer.vpool.guid)) else: cls._logger.info( 'StorageDriver {0} - Removing vPool from model'.format( storagedriver.guid)) # Clean up model try: vp_installer.vpool.delete() except Exception: errors_found = True cls._logger.exception( 'StorageDriver {0} - Cleaning up vPool from the model failed' .format(storagedriver.guid)) Configuration.delete('/ovs/vpools/{0}'.format( vp_installer.vpool.guid)) cls._logger.info('StorageDriver {0} - Running MDS checkup'.format( storagedriver.guid)) try: MDSServiceController.mds_checkup() except Exception: cls._logger.exception( 'StorageDriver {0} - MDS checkup failed'.format( storagedriver.guid)) # Update vPool status if errors_found is True: if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.FAILURE) raise RuntimeError( '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information' ) if vp_installer.storagedriver_amount > 1: vp_installer.update_status(status=VPool.STATUSES.RUNNING) cls._logger.info( 'StorageDriver {0} - Deleted StorageDriver {1}'.format( storagedriver.guid, storagedriver.name)) if len(VPoolList.get_vpools()) == 0: cluster_name = ArakoonInstaller.get_cluster_name('voldrv') if ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name)['internal'] is True: cls._logger.debug( 'StorageDriver {0} - Removing Arakoon cluster {1}'.format( storagedriver.guid, cluster_name)) try: installer = ArakoonInstaller(cluster_name=cluster_name) installer.load() installer.delete_cluster() except Exception: cls._logger.exception( 'StorageDriver {0} - Delete voldrv Arakoon cluster failed' .format(storagedriver.guid)) service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in list(service_type.services): if service.name == service_name: service.delete() # Remove watcher volumedriver service if last StorageDriver on current StorageRouter if len( storagerouter.storagedrivers ) == 0 and storagerouter not in offline_srs: # ensure client is initialized for StorageRouter try: if cls._service_manager.has_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client): cls._service_manager.stop_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) cls._service_manager.remove_service( ServiceFactory.SERVICE_WATCHER_VOLDRV, client=sr_installer.root_client) except Exception: cls._logger.exception( 'StorageDriver {0} - {1} service deletion failed'.format( storagedriver.guid, ServiceFactory.SERVICE_WATCHER_VOLDRV))
def _voldrv_arakoon_checkup(create_cluster): def _add_service(service_storagerouter, arakoon_ports, service_name): """ Add a service to the storage router """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = arakoon_ports new_service.storagerouter = service_storagerouter new_service.save() return new_service current_ips = [] current_services = [] service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is not None: arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in service_type.services: if service.name == arakoon_service_name: current_services.append(service) if service.is_internal is True: current_ips.append(service.storagerouter.ip) all_sr_ips = [ storagerouter.ip for storagerouter in StorageRouterList.get_slaves() ] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition( storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len( current_services) == 0: # Create new cluster metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD) if metadata is None: # No externally managed cluster found, we create 1 ourselves if not available_storagerouters: raise RuntimeError( 'Could not find any Storage Router with a DB role') storagerouter, partition = available_storagerouters.items()[0] arakoon_voldrv_cluster = 'voldrv' arakoon_installer = ArakoonInstaller( cluster_name=arakoon_voldrv_cluster) arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(arakoon_voldrv_cluster)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format( arakoon_voldrv_cluster))) arakoon_installer.start_cluster() ports = arakoon_installer.ports[storagerouter.ip] metadata = arakoon_installer.metadata current_ips.append(storagerouter.ip) else: ports = [] storagerouter = None cluster_name = metadata['cluster_name'] Configuration.set('/ovs/framework/arakoon_clusters|voldrv', cluster_name) StorageDriverController._logger.info( 'Claiming {0} managed arakoon cluster: {1}'.format( 'externally' if storagerouter is None else 'internally', cluster_name)) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name) current_services.append( _add_service( service_storagerouter=storagerouter, arakoon_ports=ports, service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name))) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is None: return metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if 0 < len(current_services) < len( available_storagerouters) and metadata['internal'] is True: for storagerouter, partition in available_storagerouters.iteritems( ): if storagerouter.ip in current_ips: continue arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(cluster_name)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format(cluster_name))) _add_service( service_storagerouter=storagerouter, arakoon_ports=arakoon_installer.ports[storagerouter.ip], service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name)) current_ips.append(storagerouter.ip) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def build_service_structure(structure, previous_structure=None): """ Builds an MDS service structure Example: structure = Helper.build_service_structure( {'vpools': [1], 'domains': [], 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)], # (<id>, <vpool_id>, <storagerouter_id>) 'mds_services': [(1, 1)], # (<id>, <storagedriver_id>) 'storagerouter_domains': []} # (<id>, <storagerouter_id>, <domain_id>) ) """ if previous_structure is None: previous_structure = {} vdisks = previous_structure.get('vdisks', {}) vpools = previous_structure.get('vpools', {}) domains = previous_structure.get('domains', {}) services = previous_structure.get('services', {}) mds_services = previous_structure.get('mds_services', {}) storagerouters = previous_structure.get('storagerouters', {}) storagedrivers = previous_structure.get('storagedrivers', {}) storagerouter_domains = previous_structure.get('storagerouter_domains', {}) service_type = ServiceTypeList.get_by_name('MetadataServer') if service_type is None: service_type = ServiceType() service_type.name = 'MetadataServer' service_type.save() srclients = {} for domain_id in structure.get('domains', []): if domain_id not in domains: domain = Domain() domain.name = 'domain_{0}'.format(domain_id) domain.save() domains[domain_id] = domain for vpool_id in structure.get('vpools', []): if vpool_id not in vpools: vpool = VPool() vpool.name = str(vpool_id) vpool.status = 'RUNNING' vpool.save() vpools[vpool_id] = vpool else: vpool = vpools[vpool_id] srclients[vpool_id] = StorageRouterClient(vpool.guid, None) for sr_id in structure.get('storagerouters', []): if sr_id not in storagerouters: storagerouter = StorageRouter() storagerouter.name = str(sr_id) storagerouter.ip = '10.0.0.{0}'.format(sr_id) storagerouter.rdma_capable = False storagerouter.node_type = 'MASTER' storagerouter.machine_id = str(sr_id) storagerouter.save() storagerouters[sr_id] = storagerouter disk = Disk() disk.storagerouter = storagerouter disk.state = 'OK' disk.name = '/dev/uda' disk.size = 1 * 1024**4 disk.is_ssd = True disk.aliases = ['/dev/uda'] disk.save() partition = DiskPartition() partition.offset = 0 partition.size = disk.size partition.aliases = ['/dev/uda-1'] partition.state = 'OK' partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format( sr_id) partition.disk = disk partition.roles = [ DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB ] partition.save() for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()): if sd_id not in storagedrivers: storagedriver = StorageDriver() storagedriver.vpool = vpools[vpool_id] storagedriver.storagerouter = storagerouters[sr_id] storagedriver.name = str(sd_id) storagedriver.mountpoint = '/' storagedriver.cluster_ip = storagerouters[sr_id].ip storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id) storagedriver.storagedriver_id = str(sd_id) storagedriver.ports = { 'management': 1, 'xmlrpc': 2, 'dtl': 3, 'edge': 4 } storagedriver.save() storagedrivers[sd_id] = storagedriver Helper._set_vpool_storage_driver_configuration( vpool=vpools[vpool_id], storagedriver=storagedriver) for mds_id, sd_id in structure.get('mds_services', ()): if mds_id not in mds_services: sd = storagedrivers[sd_id] s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id) service = Service() service.name = s_id service.storagerouter = sd.storagerouter service.ports = [mds_id] service.type = service_type service.save() services[s_id] = service mds_service = MDSService() mds_service.service = service mds_service.number = 0 mds_service.capacity = 10 mds_service.vpool = sd.vpool mds_service.save() mds_services[mds_id] = mds_service StorageDriverController.add_storagedriverpartition( sd, { 'size': None, 'role': DiskPartition.ROLES.DB, 'sub_role': StorageDriverPartition.SUBROLE.MDS, 'partition': sd.storagerouter.disks[0].partitions[0], 'mds_service': mds_service }) for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get( 'vdisks', ()): if vdisk_id not in vdisks: vpool = vpools[vpool_id] devicename = 'vdisk_{0}'.format(vdisk_id) mds_backend_config = Helper._generate_mdsmetadatabackendconfig( [] if mds_id is None else [mds_services[mds_id]]) volume_id = srclients[vpool_id].create_volume( devicename, mds_backend_config, 0, str(storage_driver_id)) vdisk = VDisk() vdisk.name = str(vdisk_id) vdisk.devicename = devicename vdisk.volume_id = volume_id vdisk.vpool = vpool vdisk.size = 0 vdisk.save() vdisk.reload_client('storagedriver') vdisks[vdisk_id] = vdisk for srd_id, sr_id, domain_id, backup in structure.get( 'storagerouter_domains', ()): if srd_id not in storagerouter_domains: sr_domain = StorageRouterDomain() sr_domain.backup = backup sr_domain.domain = domains[domain_id] sr_domain.storagerouter = storagerouters[sr_id] sr_domain.save() storagerouter_domains[srd_id] = sr_domain return { 'vdisks': vdisks, 'vpools': vpools, 'domains': domains, 'services': services, 'service_type': service_type, 'mds_services': mds_services, 'storagerouters': storagerouters, 'storagedrivers': storagedrivers, 'storagerouter_domains': storagerouter_domains }
def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. """ mdsservice_type = ServiceTypeList.get_by_name('MetadataServer') storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0] # Fetch service sequence number service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return None # There are already one or more MDS services running, aborting service_number += 1 # Find free port occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.append(service.ports[0]) port = System.get_free_ports(Configuration.get('ovs.ports.mds'), exclude=occupied_ports, nr=1, client=client)[0] # Add service to the model service = DalService() service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number) service.type = mdsservice_type service.storagerouter = storagerouter service.ports = [port] service.save() mds_service = MDSService() mds_service.service = service mds_service.vpool = vpool mds_service.number = service_number mds_service.save() mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: mds_nodes.append({'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_md, vpool.name, mds_service.number), 'scratch_directory': '{0}/mds_{1}_{2}'.format(storagedriver.mountpoint_temp, vpool.name, mds_service.number)}) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name) storagedriver_config.load(client) storagedriver_config.clean() # Clean out obsolete values storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :type mds_service: MDSService :param vpool: The vPool for which the MDS service will be removed :type vpool: VPool :param reconfigure: Indicates whether reconfiguration is required :type reconfigure: bool :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline :type allow_offline: bool """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError( 'Cannot remove MDSService that is still serving disks') mdsservice_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER) # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if allow_offline is True: # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append({ 'host': service.storagerouter.ip, 'port': service.ports[0], 'db_directory': sdp.path, 'scratch_directory': sdp.path }) # Generate the correct section in the Storage Driver's configuration storagedriver = [ sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid ][0] storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server( mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username='******') root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: MDSServiceController._logger.debug( 'Recursively removed {0}'.format(dir_name)) break except Exception: MDSServiceController._logger.debug( 'Waiting for the MDS service to go down...') time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: MDSServiceController._logger.info( 'Allowed offline node during mds service removal') else: raise
def remove_mds_service(mds_service, vpool, reconfigure, allow_offline=False): """ Removes an MDS service :param mds_service: The MDS service to remove :type mds_service: MDSService :param vpool: The vPool for which the MDS service will be removed :type vpool: VPool :param reconfigure: Indicates whether reconfiguration is required :type reconfigure: bool :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline :type allow_offline: bool """ if len(mds_service.vdisks_guids) > 0 and allow_offline is False: raise RuntimeError("Cannot remove MDSService that is still serving disks") mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) # Clean up model directories_to_clean = [] for sd_partition in mds_service.storagedriver_partitions: directories_to_clean.append(sd_partition.path) sd_partition.delete() if ( allow_offline is True ): # Certain vdisks might still be attached to this offline MDS service --> Delete relations for junction in mds_service.vdisks: junction.delete() mds_service.delete() mds_service.service.delete() storagerouter = mds_service.service.storagerouter try: client = SSHClient(storagerouter) if reconfigure is True: # Generate new mds_nodes section mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append( { "host": service.storagerouter.ip, "port": service.ports[0], "db_directory": sdp.path, "scratch_directory": sdp.path, } ) # Generate the correct section in the Storage Driver's configuration storagedriver = [sd for sd in storagerouter.storagedrivers if sd.vpool_guid == vpool.guid][0] storagedriver_config = StorageDriverConfiguration( "storagedriver", vpool.guid, storagedriver.storagedriver_id ) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reconfigure) tries = 5 while tries > 0: try: root_client = SSHClient(storagerouter, username="******") root_client.dir_delete(directories=directories_to_clean, follow_symlinks=True) for dir_name in directories_to_clean: MDSServiceController._logger.debug("Recursively removed {0}".format(dir_name)) break except Exception: MDSServiceController._logger.debug("Waiting for the MDS service to go down...") time.sleep(5) tries -= 1 if tries == 0: raise except UnableToConnectException: if allow_offline is True: MDSServiceController._logger.info("Allowed offline node during mds service removal") else: raise
def migrate(previous_version): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at version 3 it will execute two steps: - 1 > 2 - 2 > 3 @param previous_version: The previous version from which to start the migration. """ working_version = previous_version # Version 1 introduced: # - The datastore is still empty, add defaults if working_version < 1: from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.backendtype import BackendType from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding from ovs.dal.lists.backendtypelist import BackendTypeList # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join( random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [(admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role])] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add backends for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'), ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # Failure Domain failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() # We're now at version 1 working_version = 1 # Version 2 introduced: # - new Descriptor format if working_version < 2: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[ entry] and 'hybrids' in data[entry]['source']: filename = data[entry]['source'] if not filename.startswith('/'): filename = '/opt/OpenvStorage/ovs/dal/{0}'.format( filename) module = imp.load_source(data[entry]['name'], filename) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) # We're now at version 2 working_version = 2 # Version 3 introduced: # - new Descriptor format if working_version < 3: import imp from ovs.dal.helpers import Descriptor from ovs.extensions.storage.persistentfactory import PersistentFactory client = PersistentFactory.get_client() keys = client.prefix('ovs_data') for key in keys: data = client.get(key) modified = False for entry in data.keys(): if isinstance(data[entry], dict) and 'source' in data[entry]: module = imp.load_source(data[entry]['name'], data[entry]['source']) cls = getattr(module, data[entry]['type']) new_data = Descriptor(cls, cached=False).descriptor if 'guid' in data[entry]: new_data['guid'] = data[entry]['guid'] data[entry] = new_data modified = True if modified is True: data['_version'] += 1 client.set(key, data) working_version = 3 # Version 4 introduced: # - Flexible SSD layout if working_version < 4: import os from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.MD_SERVER).services: mds_service = service.mds_service storagedriver = None for current_storagedriver in service.storagerouter.storagedrivers: if current_storagedriver.vpool_guid == mds_service.vpool_guid: storagedriver = current_storagedriver break tasks = {} if storagedriver._data.get('mountpoint_md'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_md'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS) if storagedriver._data.get('mountpoint_temp'): tasks['{0}/mds_{1}_{2}'.format( storagedriver._data.get('mountpoint_temp'), storagedriver.vpool.name, mds_service.number)] = ( DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS) for disk in service.storagerouter.disks: for partition in disk.partitions: for directory, (role, subrole) in tasks.iteritems(): with remote(storagedriver.storagerouter.ip, [os], username='******') as rem: stat_dir = directory while not rem.os.path.exists( stat_dir) and stat_dir != '/': stat_dir = stat_dir.rsplit('/', 1)[0] if not stat_dir: stat_dir = '/' inode = rem.os.stat(stat_dir).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.mds_service == mds_service: migrated = True break if sd_partition.partition_guid == partition.guid: number = max( sd_partition.number, number) if migrated is False: sd_partition = StorageDriverPartition() sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.mds_service = mds_service sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() client = SSHClient( storagedriver.storagerouter, username='******') path = sd_partition.path.rsplit('/', 1)[0] if path: client.dir_create(path) client.dir_chown(path, 'ovs', 'ovs') client.dir_create(directory) client.dir_chown(directory, 'ovs', 'ovs') client.symlink( {sd_partition.path: directory}) for storagedriver in StorageDriverList.get_storagedrivers(): migrated_objects = {} for disk in storagedriver.storagerouter.disks: for partition in disk.partitions: # Process all mountpoints that are unique and don't have a specified size for key, (role, sr_info) in { 'mountpoint_md': (DiskPartition.ROLES.DB, { 'metadata_{0}': StorageDriverPartition.SUBROLE.MD, 'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG }), 'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, { 'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE }), 'mountpoint_foc': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_dtl': (DiskPartition.ROLES.WRITE, { 'fd_{0}': StorageDriverPartition.SUBROLE.FD, 'dtl_{0}': StorageDriverPartition.SUBROLE.DTL }), 'mountpoint_readcaches': (DiskPartition.ROLES.READ, { '': None }), 'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, { 'sco_{0}': StorageDriverPartition.SUBROLE.SCO }) }.iteritems(): if key in storagedriver._data: is_list = isinstance(storagedriver._data[key], list) entries = storagedriver._data[ key][:] if is_list is True else [ storagedriver._data[key] ] for entry in entries: if not entry: if is_list: storagedriver._data[key].remove( entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[key] else: del storagedriver._data[key] else: with remote( storagedriver.storagerouter.ip, [os], username='******') as rem: inode = rem.os.stat(entry).st_dev if partition.inode == inode: if role not in partition.roles: partition.roles.append(role) partition.save() for folder, subrole in sr_info.iteritems( ): number = 0 migrated = False for sd_partition in storagedriver.partitions: if sd_partition.role == role and sd_partition.sub_role == subrole: if sd_partition.partition_guid == partition.guid: number = max( sd_partition. number, number) if migrated is False: sd_partition = StorageDriverPartition( ) sd_partition.role = role sd_partition.sub_role = subrole sd_partition.partition = partition sd_partition.storagedriver = storagedriver sd_partition.size = None sd_partition.number = number + 1 sd_partition.save() if folder: source = '{0}/{1}'.format( entry, folder.format( storagedriver. vpool.name)) else: source = entry client = SSHClient( storagedriver. storagerouter, username='******') path = sd_partition.path.rsplit( '/', 1)[0] if path: client.dir_create(path) client.dir_chown( path, 'ovs', 'ovs') client.symlink({ sd_partition.path: source }) migrated_objects[ source] = sd_partition if is_list: storagedriver._data[ key].remove(entry) if len(storagedriver._data[key] ) == 0: del storagedriver._data[ key] else: del storagedriver._data[key] storagedriver.save() if 'mountpoint_bfs' in storagedriver._data: storagedriver.mountpoint_dfs = storagedriver._data[ 'mountpoint_bfs'] if not storagedriver.mountpoint_dfs: storagedriver.mountpoint_dfs = None del storagedriver._data['mountpoint_bfs'] storagedriver.save() if 'mountpoint_temp' in storagedriver._data: del storagedriver._data['mountpoint_temp'] storagedriver.save() if migrated_objects: print 'Loading sizes' config = StorageDriverConfiguration( 'storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id) config.load() for readcache in config.configuration.get( 'content_addressed_cache', {}).get('clustercache_mount_points', []): path = readcache.get('path', '').rsplit('/', 1)[0] size = int(readcache['size'].strip( 'KiB')) * 1024 if 'size' in readcache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() for writecache in config.configuration.get( 'scocache', {}).get('scocache_mount_points', []): path = writecache.get('path', '') size = int(writecache['size'].strip( 'KiB')) * 1024 if 'size' in writecache else None if path in migrated_objects: migrated_objects[path].size = long(size) migrated_objects[path].save() working_version = 4 # Version 5 introduced: # - Failure Domains if working_version < 5: import os from ovs.dal.hybrids.failuredomain import FailureDomain from ovs.dal.lists.failuredomainlist import FailureDomainList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.remote import remote from ovs.extensions.generic.sshclient import SSHClient failure_domains = FailureDomainList.get_failure_domains() if len(failure_domains) > 0: failure_domain = failure_domains[0] else: failure_domain = FailureDomain() failure_domain.name = 'Default' failure_domain.save() for storagerouter in StorageRouterList.get_storagerouters(): change = False if storagerouter.primary_failure_domain is None: storagerouter.primary_failure_domain = failure_domain change = True if storagerouter.rdma_capable is None: client = SSHClient(storagerouter, username='******') rdma_capable = False with remote(client.ip, [os], username='******') as rem: for root, dirs, files in rem.os.walk( '/sys/class/infiniband'): for directory in dirs: ports_dir = '/'.join( [root, directory, 'ports']) if not rem.os.path.exists(ports_dir): continue for sub_root, sub_dirs, _ in rem.os.walk( ports_dir): if sub_root != ports_dir: continue for sub_directory in sub_dirs: state_file = '/'.join( [sub_root, sub_directory, 'state']) if rem.os.path.exists(state_file): if 'ACTIVE' in client.run( 'cat {0}'.format( state_file)): rdma_capable = True storagerouter.rdma_capable = rdma_capable change = True if change is True: storagerouter.save() working_version = 5 # Version 6 introduced: # - Distributed scrubbing if working_version < 6: from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.sshclient import SSHClient for storage_driver in StorageDriverList.get_storagedrivers(): root_client = SSHClient(storage_driver.storagerouter, username='******') for partition in storage_driver.partitions: if partition.role == DiskPartition.ROLES.SCRUB: old_path = partition.path partition.sub_role = None partition.save() partition.invalidate_dynamics(['folder', 'path']) if root_client.dir_exists(partition.path): continue # New directory already exists if '_mds_' in old_path: if root_client.dir_exists(old_path): root_client.symlink({partition.path: old_path}) if not root_client.dir_exists(partition.path): root_client.dir_create(partition.path) root_client.dir_chmod(partition.path, 0777) working_version = 6 # Version 7 introduced: # - vPool status if working_version < 7: from ovs.dal.hybrids import vpool reload(vpool) from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList for _vpool in VPoolList.get_vpools(): vpool = VPool(_vpool.guid) if hasattr(vpool, 'status') and vpool.status is None: vpool.status = VPool.STATUSES.RUNNING vpool.save() working_version = 7 # Version 10 introduced: # - Reverse indexes are stored in persistent store # - Store more non-changing metadata on disk iso using a dynamic property if working_version < 10: from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.datalist import DataList from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.storage.volatilefactory import VolatileFactory persistent = PersistentFactory.get_client() for prefix in ['ovs_listcache', 'ovs_reverseindex']: for key in persistent.prefix(prefix): persistent.delete(key) for key in persistent.prefix('ovs_data_'): persistent.set(key, persistent.get(key)) base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}' hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() all_objects = DataList(cls, { 'type': DataList.where_operator.AND, 'items': [] }) for item in all_objects: guid = item.guid for relation in item._relations: if relation.foreign_type is None: rcls = cls rclsname = rcls.__name__.lower() else: rcls = relation.foreign_type rclsname = rcls.__name__.lower() key = relation.name rguid = item._data[key]['guid'] if rguid is not None: reverse_key = base_reverse_key.format( rclsname, rguid, relation.foreign_key, guid) persistent.set(reverse_key, 0) volatile = VolatileFactory.get_client() try: volatile._client.flush_all() except: pass from ovs.dal.lists.vdisklist import VDiskList for vdisk in VDiskList.get_vdisks(): try: vdisk.metadata = { 'lba_size': vdisk.info['lba_size'], 'cluster_multiplier': vdisk.info['cluster_multiplier'] } vdisk.save() except: pass working_version = 10 # Version 11 introduced: # - ALBA accelerated ALBA, meaning different vpool.metadata information if working_version < 11: from ovs.dal.lists.vpoollist import VPoolList for vpool in VPoolList.get_vpools(): vpool.metadata = {'backend': vpool.metadata} if 'metadata' in vpool.metadata['backend']: vpool.metadata['backend'][ 'arakoon_config'] = vpool.metadata['backend'].pop( 'metadata') if 'backend_info' in vpool.metadata['backend']: vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_read'] = True vpool.metadata['backend']['backend_info'][ 'fragment_cache_on_write'] = False vpool.save() working_version = 11 return working_version
def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config): """ Prepares an MDS service: * Creates the required configuration * Sets up the service files Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise configuration regarding both is completed. :param storagerouter: Storagerouter on which MDS service will be created :type storagerouter: StorageRouter :param vpool: The vPool for which the MDS service will be created :type vpool: VPool :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created :type fresh_only: bool :param reload_config: If True, the volumedriver's updated configuration will be reloaded :type reload_config: bool :return: Newly created service :rtype: MDSService """ # Fetch service sequence number based on MDS services for current vPool and current storage router service_number = -1 for mds_service in vpool.mds_services: if mds_service.service.storagerouter_guid == storagerouter.guid: service_number = max(mds_service.number, service_number) if fresh_only is True and service_number >= 0: return # There is already 1 or more MDS services running, aborting # VALIDATIONS # 1. Find free port based on MDS services for all vPools on current storage router client = SSHClient(storagerouter) mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) occupied_ports = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: occupied_ports.extend(service.ports) mds_port_range = Configuration.get( "/ovs/framework/hosts/{0}/ports|mds".format(System.get_my_machine_id(client)) ) free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client) if not free_ports: raise RuntimeError( "Failed to find an available port on storage router {0} within range {1}".format( storagerouter.name, mds_port_range ) ) # 2. Partition check db_partition = None for disk in storagerouter.disks: for partition in disk.partitions: if DiskPartition.ROLES.DB in partition.roles: db_partition = partition break if db_partition is None: raise RuntimeError("Could not find DB partition on storage router {0}".format(storagerouter.name)) # 3. Verify storage driver configured storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid] if not storagedrivers: raise RuntimeError( "Expected to find a configured storagedriver for vpool {0} on storage router {1}".format( vpool.name, storagerouter.name ) ) storagedriver = storagedrivers[0] # MODEL UPDATES # 1. Service service_number += 1 service = Service() service.name = "metadataserver_{0}_{1}".format(vpool.name, service_number) service.type = mdsservice_type service.ports = [free_ports[0]] service.storagerouter = storagerouter service.save() mds_service = MDSService() mds_service.vpool = vpool mds_service.number = service_number mds_service.service = service mds_service.save() # 2. Storage driver partitions from ovs.lib.storagedriver import StorageDriverController StorageDriverController.add_storagedriverpartition( storagedriver, { "size": None, "role": DiskPartition.ROLES.DB, "sub_role": StorageDriverPartition.SUBROLE.MDS, "partition": db_partition, "mds_service": mds_service, }, ) # CONFIGURATIONS # 1. Volumedriver mds_nodes = [] for service in mdsservice_type.services: if service.storagerouter_guid == storagerouter.guid: mds_service = service.mds_service if mds_service is not None: if mds_service.vpool_guid == vpool.guid: sdp = [ sd_partition for sd_partition in mds_service.storagedriver_partitions if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS ][0] mds_nodes.append( { "host": service.storagerouter.ip, "port": service.ports[0], "db_directory": sdp.path, "scratch_directory": sdp.path, } ) # Generate the correct section in the Storage Driver's configuration storagedriver_config = StorageDriverConfiguration("storagedriver", vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes) storagedriver_config.save(client, reload_config=reload_config) return mds_service
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again * Eg: /ovs/framework/migration|stats_monkey_integration: True """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.db.arakooninstaller import ArakoonInstaller from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator from ovs.extensions.packages.packagefactory import PackageFactory from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip, # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name), new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name)) # Register new service and remove old service service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']: proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) if service_manager.__class__ == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information vpools = VPoolList.get_vpools() for vpool in vpools: bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() ##################################### # Update the vPool metadata structure def _update_metadata_structure(metadata): metadata = copy.deepcopy(metadata) cache_structure = {'read': False, 'write': False, 'is_backend': False, 'quota': None, 'backend_info': {'name': None, # Will be filled in when is_backend is true 'backend_guid': None, 'alba_backend_guid': None, 'policies': None, 'preset': None, 'arakoon_config': None, 'connection_info': {'client_id': None, 'client_secret': None, 'host': None, 'port': None, 'local': None}} } structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read', 'write': 'block_cache_on_write', 'quota': 'quota_bc', 'backend_prefix': 'backend_bc_{0}'}, StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read', 'write': 'fragment_cache_on_write', 'quota': 'quota_fc', 'backend_prefix': 'backend_aa_{0}'}} if 'arakoon_config' in metadata['backend']: # Arakoon config should be placed under the backend info metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config') if 'connection_info' in metadata['backend']: # Connection info sohuld be placed under the backend info metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info') if 'caching_info' not in metadata: # Caching info is the new key would_be_caching_info = {} metadata['caching_info'] = would_be_caching_info # Extract all caching data for every storagerouter current_caching_info = metadata['backend'].pop('caching_info') # Pop to mutate metadata for storagerouter_guid in current_caching_info.iterkeys(): current_cache_data = current_caching_info[storagerouter_guid] storagerouter_caching_info = {} would_be_caching_info[storagerouter_guid] = storagerouter_caching_info for cache_type, cache_type_mapping in structure_map.iteritems(): new_cache_structure = copy.deepcopy(cache_structure) storagerouter_caching_info[cache_type] = new_cache_structure for new_structure_key, old_structure_key in cache_type_mapping.iteritems(): if new_structure_key == 'backend_prefix': # Get possible backend related info metadata_key = old_structure_key.format(storagerouter_guid) if metadata_key not in metadata: continue backend_data = metadata.pop(metadata_key) # Pop to mutate metadata new_cache_structure['is_backend'] = True # Copy over the old data new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config'] new_cache_structure['backend_info'].update(backend_data['backend_info']) new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info']) else: new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key) return metadata vpools = VPoolList.get_vpools() for vpool in vpools: try: new_metadata = _update_metadata_structure(vpool.metadata) vpool.metadata = new_metadata vpool.save() except KeyError: MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name)) ############################################## # Always use indent=4 during Configuration set def _resave_all_config_entries(config_path='/ovs'): """ Recursive functions which checks every config management key if its a directory or not. If not a directory, we retrieve the config and just save it again using the new indentation logic """ for item in Configuration.list(config_path): new_path = config_path + '/' + item print new_path if Configuration.dir_exists(new_path) is True: _resave_all_config_entries(config_path=new_path) else: try: _config = Configuration.get(new_path) Configuration.set(new_path, _config) except: _config = Configuration.get(new_path, raw=True) Configuration.set(new_path, _config, raw=True) if ExtensionMigrator.THIS_VERSION <= 13: # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower MigrationController._logger.info('Re-saving every configuration setting with new indentation rules') _resave_all_config_entries() ############################ # Update some default values def _update_manifest_cache_size(_proxy_config_key): updated = False manifest_cache_size = 500 * 1024 * 1024 if Configuration.exists(key=_proxy_config_key): _proxy_config = Configuration.get(key=_proxy_config_key) for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]: if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba': if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size if _proxy_config['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config['manifest_cache_size'] = manifest_cache_size if updated is True: Configuration.set(key=_proxy_config_key, value=_proxy_config) return updated for storagedriver in StorageDriverList.get_storagedrivers(): try: vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services for alba_proxy in storagedriver.alba_proxies: if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True: # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name)) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] for key, value in current_config.iteritems(): if key.isdigit() is True: if value.get('alba_connection_asd_connection_pool_capacity') != 10: changes = True value['alba_connection_asd_connection_pool_capacity'] = 10 if value.get('alba_connection_timeout') != 30: changes = True value['alba_connection_timeout'] = 30 if value.get('alba_connection_rora_manifest_cache_capacity') != 25000: changes = True value['alba_connection_rora_manifest_cache_capacity'] = 25000 if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**current_config) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) except Exception: MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id)) #################################################### # Adding proxy fail fast as env variable for proxies changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-albaproxy_'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'Environment=ALBA_FAIL_FAST=true' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'env ALBA_FAIL_FAST=true' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ###################################### # Integration of stats monkey (2.10.2) if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False: try: # Get content of old key into new key old_stats_monkey_key = '/statsmonkey/statsmonkey' if Configuration.exists(key=old_stats_monkey_key) is True: Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key)) Configuration.delete(key=old_stats_monkey_key) # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before celery_key = '/ovs/framework/scheduling/celery' current_value = None scheduling_config = Configuration.get(key=celery_key, default={}) if 'statsmonkey.run_all_stats' in scheduling_config: # Old celery task name of the stats monkey current_value = scheduling_config.pop('statsmonkey.run_all_stats') scheduling_config['ovs.stats_monkey.run_all'] = current_value scheduling_config['alba.stats_monkey.run_all'] = current_value Configuration.set(key=celery_key, value=scheduling_config) support_key = '/ovs/framework/support' support_config = Configuration.get(key=support_key) support_config['support_agent'] = support_config.pop('enabled', True) support_config['remote_access'] = support_config.pop('enablesupport', False) Configuration.set(key=support_key, value=support_config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True) except Exception: MigrationController._logger.exception('Integration of stats monkey failed') ###################################################### # Write away cluster ID to a file for back-up purposes try: cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None) with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file: config = json.load(config_file) if cluster_id is not None and config.get('cluster_id', None) is None: config['cluster_id'] = cluster_id with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file: json.dump(config, config_file, indent=4) except Exception: MigrationController._logger.exception('Writing cluster id to a file failed.') ######################################################### # Additional string formatting in Arakoon services (2.11) try: if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False: arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')] for storagerouter in StorageRouterList.get_masters(): for service_name in arakoon_service_names: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON Configuration.set(key=config_key, value=config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in ALBA proxy services (2.11) changed_clients = set() try: if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA) for service in ServiceTypeList.get_by_name('AlbaProxy').services: root_client = sr_client_map[service.storagerouter_guid] config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = alba_pkg_name config['ALBA_VERSION_CMD'] = alba_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name='ovs-{0}'.format(service.name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ############################################################ # Additional string formatting in DTL/VOLDRV services (2.11) try: if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False: sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: root_client = sr_client_map[storagedriver.storagerouter_guid] for entry in ['dtl', 'volumedriver']: service_name = '{0}_{1}'.format(entry, vpool.name) service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['VOLDRV_PKG_NAME'] = sd_pkg_name config['VOLDRV_VERSION_CMD'] = sd_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service(name=service_template, client=root_client, target_name='ovs-{0}'.format(service_name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True) except Exception: MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed') ####################################################### # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876) if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False: try: voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD) for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map.get(storagerouter.guid) if root_client is None: continue for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) regenerate = False if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER: if 'volumedriver-server' in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER) root_client.file_write(filename=file_path, contents=contents) elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE: if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents: regenerate = True contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE) contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE) root_client.file_write(filename=file_path, contents=contents) if regenerate is True: service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD, client=root_client, target_name='ovs-{0}'.format(file_name.split('.')[0])) # Leave out .version changed_clients.add(root_client) Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True) except Exception: MigrationController._logger.exception('Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed') ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) MigrationController._logger.info('Finished out of band migrations')
def build_dal_structure(structure, previous_structure=None): """ Builds a service structure Example: structure = AlbaDalHelper.build_service_structure({ 'alba_backends': [1], 'alba_nodes': [1] }) """ if previous_structure is None: previous_structure = {} alba_osds = previous_structure.get('alba_osds', {}) alba_nodes = previous_structure.get('alba_nodes', {}) backend_types = previous_structure.get('backend_types', {}) service_types = previous_structure.get('service_types', {}) alba_backends = previous_structure.get('alba_backends', {}) alba_abm_clusters = previous_structure.get('alba_abm_clusters', {}) alba_nsm_clusters = previous_structure.get('alba_nsm_clusters', {}) if 1 not in backend_types: backend_type = BackendType() backend_type.code = 'alba' backend_type.name = 'ALBA' backend_type.save() backend_types[1] = backend_type if 'AlbaManager' not in service_types: service_type = ServiceTypeList.get_by_name('AlbaManager') if service_type is None: service_type = ServiceType() service_type.name = 'AlbaManager' service_type.save() service_types['AlbaManager'] = service_type if 'NamespaceManager' not in service_types: service_type = ServiceTypeList.get_by_name('NamespaceManager') if service_type is None: service_type = ServiceType() service_type.name = 'NamespaceManager' service_type.save() service_types['NamespaceManager'] = service_type for ab_id, scaling in structure.get('alba_backends', ()): if ab_id not in alba_backends: backend = Backend() backend.name = 'backend_{0}'.format(ab_id) backend.backend_type = backend_types[1] backend.save() alba_backend = AlbaBackend() alba_backend.backend = backend alba_backend.scaling = getattr(AlbaBackend.SCALINGS, scaling) alba_backend.alba_id = str(ab_id) alba_backend.save() alba_backends[ab_id] = alba_backend for ab_id in structure.get('alba_abm_clusters', ()): if ab_id not in alba_abm_clusters: if ab_id not in alba_backends: raise ValueError('Non-existing ALBA Backend ID provided') alba_backend = alba_backends[ab_id] abm_cluster = ABMCluster() abm_cluster.name = '{0}-abm'.format(alba_backend.name) abm_cluster.alba_backend = alba_backend abm_cluster.config_location = '/ovs/arakoon/{0}-abm/config'.format( alba_backend.name) abm_cluster.save() abm_service = Service() abm_service.name = 'arakoon-{0}-abm'.format(alba_backend.name) abm_service.type = service_types['AlbaManager'] abm_service.ports = [] abm_service.storagerouter = None abm_service.save() abm_junction_service = ABMService() abm_junction_service.service = abm_service abm_junction_service.abm_cluster = abm_cluster abm_junction_service.save() alba_abm_clusters[ab_id] = abm_cluster for ab_id, amount in structure.get('alba_nsm_clusters', ()): if ab_id not in alba_nsm_clusters or amount != len( alba_nsm_clusters[ab_id]): if ab_id not in alba_backends: raise ValueError('Non-existing ALBA Backend ID provided') alba_backend = alba_backends[ab_id] alba_nsm_clusters[ab_id] = [] nsm_clusters = dict( (nsm_cluster.number, nsm_cluster) for nsm_cluster in alba_backend.nsm_clusters) for number in range(amount): if number in nsm_clusters: alba_nsm_clusters[ab_id].append(nsm_clusters[number]) continue nsm_cluster = NSMCluster() nsm_cluster.name = '{0}-nsm_{1}'.format( alba_backend.name, number) nsm_cluster.number = number nsm_cluster.alba_backend = alba_backend nsm_cluster.config_location = '/ovs/arakoon/{0}-nsm_{1}/config'.format( alba_backend.name, number) nsm_cluster.save() nsm_service = Service() nsm_service.name = 'arakoon-{0}-nsm_{1}'.format( alba_backend.name, number) nsm_service.type = service_types['NamespaceManager'] nsm_service.ports = [] nsm_service.storagerouter = None nsm_service.save() nsm_junction_service = NSMService() nsm_junction_service.service = nsm_service nsm_junction_service.nsm_cluster = nsm_cluster nsm_junction_service.save() alba_nsm_clusters[ab_id].append(nsm_cluster) for an_id in structure.get('alba_nodes', []): if an_id not in alba_nodes: alba_node = AlbaNode() alba_node.ip = '10.1.0.{0}'.format(an_id) alba_node.port = 8500 alba_node.username = str(an_id) alba_node.password = str(an_id) alba_node.node_id = 'node_{0}'.format(an_id) alba_node.save() alba_nodes[an_id] = alba_node if alba_node in ManagerClientMockup.test_results: ManagerClientMockup.test_results[alba_node].update( {'get_metadata': { '_version': 3 }}) else: ManagerClientMockup.test_results[alba_node] = { 'get_metadata': { '_version': 3 } } for ao_id, ab_id, an_id, slot_id in structure.get('alba_osds', ()): if ao_id not in alba_osds: osd = AlbaOSD() osd.osd_id = 'alba_osd_{0}'.format(ao_id) osd.osd_type = AlbaOSD.OSD_TYPES.ASD osd.alba_backend = alba_backends[ab_id] osd.alba_node = alba_nodes[an_id] osd.slot_id = 'alba_slot_{0}'.format(slot_id) osd.ips = ['127.0.0.{0}'.format(ao_id)] osd.port = 35000 + ao_id osd.save() alba_osds[ao_id] = osd return { 'alba_osds': alba_osds, 'alba_nodes': alba_nodes, 'backend_types': backend_types, 'service_types': service_types, 'alba_backends': alba_backends, 'alba_abm_clusters': alba_abm_clusters, 'alba_nsm_clusters': alba_nsm_clusters }