def _dtl_status(self): """ Retrieve the DTL status for a vDisk """ sd_status = self.info.get('failover_mode', 'UNKNOWN').lower() if sd_status == '': sd_status = 'unknown' if sd_status != 'ok_standalone': return sd_status # Verify whether 'ok_standalone' is the correct status for this vDisk vpool_dtl = self.vpool.configuration['dtl_enabled'] if self.has_manual_dtl is True or vpool_dtl is False: return sd_status domains = [] possible_dtl_targets = set() for sr in StorageRouterList.get_storagerouters(): if sr.guid == self.storagerouter_guid: domains = [junction.domain for junction in sr.domains] elif len(sr.storagedrivers) > 0: possible_dtl_targets.add(sr) if len(domains) > 0: possible_dtl_targets = set() for domain in domains: possible_dtl_targets.update(StorageRouterList.get_primary_storagerouters_for_domain(domain)) if len(possible_dtl_targets) == 0: return sd_status return 'checkup_required'
def _dtl_status(self): """ Retrieve the DTL status for a vDisk """ sd_status = self.info.get('failover_mode', 'UNKNOWN').lower() if sd_status == '': sd_status = 'unknown' if sd_status != 'ok_standalone': return sd_status # Verify whether 'ok_standalone' is the correct status for this vDisk vpool_dtl = self.vpool.configuration['dtl_enabled'] if self.has_manual_dtl is True or vpool_dtl is False: return sd_status domains = [] possible_dtl_targets = set() for sr in StorageRouterList.get_storagerouters(): if sr.guid == self.storagerouter_guid: domains = [junction.domain for junction in sr.domains] elif len(sr.storagedrivers) > 0: possible_dtl_targets.add(sr) if len(domains) > 0: possible_dtl_targets = set() for domain in domains: possible_dtl_targets.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) if len(possible_dtl_targets) == 0: return sd_status return 'checkup_required'
def get_mds_storagedriver_config_set(vpool, check_online=False): """ Builds a configuration for all StorageRouters from a given VPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds_safety` nodes are returned The configuration returned is the default configuration used by the volumedriver of which in normal use-cases only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS During ensure_safety, we actually create/set the MDS slaves for each volume :param vpool: vPool to get storagedriver configuration for :type vpool: VPool :param check_online: Check whether the storage routers are actually responsive :type check_online: bool :return: MDS configuration for a vPool :rtype: dict """ mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if check_online is True: try: SSHClient(storagerouter) except UnableToConnectException: continue mds_service, load = MDSServiceController.get_preferred_mds(storagerouter, vpool) if mds_service is None: raise RuntimeError('Could not find an MDS service') mds_per_storagerouter[storagerouter] = {'host': storagerouter.ip, 'port': mds_service.service.ports[0]} if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter) safety = Configuration.get('/ovs/framework/storagedriver|mds_safety') config_set = {} for storagerouter, ip_info in mds_per_storagerouter.iteritems(): config_set[storagerouter.guid] = [ip_info] for importance in ['primary', 'secondary']: domains = [junction.domain for junction in storagerouter.domains if junction.backup is (importance == 'secondary')] possible_storagerouters = set() for domain in domains: possible_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain)) for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in possible_storagerouters: config_set[storagerouter.guid].append(mds_per_storagerouter[other_storagerouter]) return config_set
def ensure_safety(vdisk, excluded_storagerouters=None): """ Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor). Assumptions: * A local overloaded master is better than a non-local non-overloaded master * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded) * Too much safety is not wanted (it adds loads to nodes while not required) :param vdisk: vDisk to calculate a new safety for :type vdisk: VDisk :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable) :type excluded_storagerouters: list :return: None """ def _add_suitable_nodes(_importance, _safety): if len(nodes) < _safety: for local_load in sorted(all_info_dict[_importance]['loads']): for local_service in all_info_dict[_importance]['loads'][ local_load]: if len( nodes ) < _safety and local_service.storagerouter.ip not in nodes: try: SSHClient(local_service.storagerouter) new_services.append(local_service) nodes.add(local_service.storagerouter.ip) except UnableToConnectException: MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable' .format(vdisk.guid, service.storagerouter.ip)) return nodes, new_services MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Start checkup for virtual disk {1}'.format( vdisk.guid, vdisk.name)) tlogs = Configuration.get('/ovs/framework/storagedriver|mds_tlogs') safety = Configuration.get('/ovs/framework/storagedriver|mds_safety') max_load = Configuration.get( '/ovs/framework/storagedriver|mds_maxload') ###################### # GATHER INFORMATION # ###################### vdisk.reload_client('storagedriver') vdisk.reload_client('objectregistry') vdisk.invalidate_dynamics(['storagedriver_id', 'storagerouter_guid']) if vdisk.storagerouter_guid is None: raise SRCObjectNotFoundException( 'Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router' .format(vdisk.name, vdisk.guid)) if excluded_storagerouters is None: excluded_storagerouters = [] # Sorted was added merely for unittests, because they rely on specific order of services and their ports # Default sorting behavior for relations used to be based on order in which relations were added # Now sorting is based on guid (DAL speedup changes) nodes = set() services = sorted([ mds_service.service for mds_service in vdisk.vpool.mds_services if mds_service.service.storagerouter not in excluded_storagerouters ], key=lambda k: k.ports) service_per_key = {} for service in services: nodes.add(service.storagerouter.ip) service_per_key['{0}:{1}'.format(service.storagerouter.ip, service.ports[0])] = service # Create a pool of StorageRouters being a part of the primary and secondary domains of this Storage Router vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid) primary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is False ] secondary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is True ] primary_storagerouters = set() secondary_storagerouters = set() for domain in primary_domains: primary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) for domain in secondary_domains: secondary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) # In case no domains have been configured if len(primary_storagerouters) == 0: primary_storagerouters = set( StorageRouterList.get_storagerouters()) if vdisk_storagerouter not in primary_storagerouters or vdisk_storagerouter in secondary_storagerouters: raise ValueError( 'StorageRouter {0} for vDisk {1} should be part of the primary domains and NOT be part of the secondary domains' .format(vdisk_storagerouter.name, vdisk.name)) # Remove all storagerouters from secondary which are present in primary secondary_storagerouters = secondary_storagerouters.difference( primary_storagerouters) ################################### # VERIFY RECONFIGURATION REQUIRED # ################################### vdisk.invalidate_dynamics(['info']) configs = vdisk.info[ 'metadata_backend_config'] # Ordered MASTER, SLAVE (secondary domain of master) master_service = None reconfigure_reasons = [] if len(configs) > 0: config = configs.pop(0) config_key = '{0}:{1}'.format(config['ip'], config['port']) master_service = service_per_key.get(config_key) if master_service is None: reconfigure_reasons.append( 'Master ({0}:{1}) cannot be used anymore'.format( config['ip'], config['port'])) slave_services = [] for config in configs: config_key = '{0}:{1}'.format(config['ip'], config['port']) if config_key in service_per_key: slave_services.append(service_per_key[config_key]) else: reconfigure_reasons.append( 'Slave ({0}:{1}) cannot be used anymore'.format( config['ip'], config['port'])) # If MDS already in use, take current load, else take next load all_info_dict = { 'primary': { 'used': [], 'loads': {}, 'available': [] }, 'secondary': { 'used': [], 'loads': {}, 'available': [] } } services_load = {} for service in services: importance = None if service.storagerouter in primary_storagerouters: importance = 'primary' elif service.storagerouter in secondary_storagerouters: importance = 'secondary' loads = MDSServiceController.get_mds_load(service.mds_service) if service == master_service or service in slave_services: # Service is still in use load = loads[0] if importance is not None: all_info_dict[importance]['used'].append(service) else: reconfigure_reasons.append( 'Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the domains' .format(service.name, service.storagerouter.ip)) else: # Service is not in use, but available load = loads[1] services_load[service] = load if importance is not None: all_info_dict[importance]['available'].append(service) if load <= max_load: if load not in all_info_dict[importance]['loads']: all_info_dict[importance]['loads'][load] = [] all_info_dict[importance]['loads'][load].append(service) service_nodes = [] if master_service is not None: service_nodes.append(master_service.storagerouter.ip) for service in slave_services: ip = service.storagerouter.ip if ip in service_nodes: reconfigure_reasons.append( 'Multiple MDS services on the same node') else: service_nodes.append(ip) if len(service_nodes) > safety: reconfigure_reasons.append('Too much safety') if len(service_nodes) < safety and len(service_nodes) < len(nodes): reconfigure_reasons.append('Not enough safety') if master_service is not None and services_load[ master_service] > max_load: reconfigure_reasons.append('Master overloaded') if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid: reconfigure_reasons.append('Master is not local') if any(service for service in slave_services if services_load[service] > max_load): reconfigure_reasons.append('One or more slaves overloaded') # Check reconfigure required based upon domains recommended_primary = math.ceil( safety / 2.0) if len(secondary_storagerouters) > 0 else safety recommended_secondary = safety - recommended_primary if master_service is not None and master_service not in all_info_dict[ 'primary']['used']: # Master service not present in primary domain reconfigure_reasons.append('Master service not in primary domain') primary_services_used = len(all_info_dict['primary']['used']) primary_services_available = len(all_info_dict['primary']['available']) if primary_services_used < recommended_primary and primary_services_used < primary_services_available: # More services can be used in primary domain reconfigure_reasons.append( 'Not enough services in use in primary domain') if primary_services_used > recommended_primary: # Too many services in primary domain reconfigure_reasons.append( 'Too many services in use in primary domain') # More services can be used in secondary domain secondary_services_used = len(all_info_dict['secondary']['used']) secondary_services_available = len( all_info_dict['secondary']['available']) if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available: reconfigure_reasons.append( 'Not enough services in use in secondary domain') if secondary_services_used > recommended_secondary: # Too many services in secondary domain reconfigure_reasons.append( 'Too many services in use in secondary domain') # If secondary domain present, check order in which the slave services are configured secondary = False for slave_service in slave_services: if secondary is True and slave_service in all_info_dict['primary'][ 'used']: reconfigure_reasons.append( 'A slave in secondary domain has priority over a slave in primary domain' ) break if slave_service in all_info_dict['secondary']['used']: secondary = True if not reconfigure_reasons: MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: No reconfiguration required'.format( vdisk.guid)) MDSServiceController.sync_vdisk_to_reality(vdisk) return MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Reconfiguration required. Reasons:'.format( vdisk.guid)) for reason in reconfigure_reasons: MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: * {1}'.format(vdisk.guid, reason)) ############################ # CREATE NEW CONFIGURATION # ############################ # Check whether the master (if available) is non-local to the vdisk and/or is overloaded new_services = [] master_ok = master_service is not None if master_ok is True: master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[ master_service] <= max_load previous_master = None if master_ok: # Add this master to the fresh configuration new_services.append(master_service) else: # Try to find the best non-overloaded LOCAL MDS slave to make master candidate_master_service = None candidate_master_load = 0 local_mds = None local_mds_load = 0 for service in all_info_dict['primary']['available']: load = services_load[service] if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid: if local_mds is None or local_mds_load > load: # This service is a non-overloaded local MDS local_mds = service local_mds_load = load if service in slave_services: if candidate_master_service is None or candidate_master_load > load: # This service is a non-overloaded local slave candidate_master_service = service candidate_master_load = load if candidate_master_service is not None: # A non-overloaded local slave was found. client = MetadataServerClient.load(candidate_master_service) try: amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True) except RuntimeError as ex: if 'Namespace does not exist' in ex.message: client.create_namespace(str(vdisk.volume_id)) amount_of_tlogs = client.catch_up( str(vdisk.volume_id), True) else: raise if amount_of_tlogs < tlogs: # Almost there. Catching up right now, and continue as soon as it's up-to-date start = time.time() client.catch_up(str(vdisk.volume_id), False) MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Catchup took {1}s'.format( vdisk.guid, round(time.time() - start, 2))) # It's up to date, so add it as a new master new_services.append(candidate_master_service) if master_service is not None: # The current master (if available) is now candidate to become one of the slaves slave_services.append(master_service) previous_master = master_service else: # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up if master_service is not None: new_services.append(master_service) new_services.append(candidate_master_service) if candidate_master_service in slave_services: slave_services.remove(candidate_master_service) else: # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave if master_service is not None: new_services.append(master_service) if local_mds is not None: new_services.append(local_mds) if local_mds in slave_services: slave_services.remove(local_mds) # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover nodes = set(service.storagerouter.ip for service in new_services) # Recycle slave for faster failover secondary_node_count = 0 service_to_recycle = None if len(nodes) < safety: if recommended_primary > 1: # If primary is 1, we only have master in primary # Try to recycle slave which is in primary domain for load in sorted(all_info_dict['primary']['loads']): for service in all_info_dict['primary']['loads'][load]: if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes: try: SSHClient(service.storagerouter) service_to_recycle = service except UnableToConnectException: MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable' .format(vdisk.guid, service.storagerouter.ip)) # Try to recycle slave which is in secondary domain if none found in primary if service_to_recycle is None and len( secondary_storagerouters) > 0: for load in sorted(all_info_dict['secondary']['loads']): for service in all_info_dict['secondary']['loads'][load]: if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes: try: SSHClient(service.storagerouter) service_to_recycle = service secondary_node_count = 1 # We do not want to configure the secondary slave BEFORE the primary slaves except UnableToConnectException: MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable' .format(vdisk.guid, service.storagerouter.ip)) if service_to_recycle is not None: slave_services.remove(service_to_recycle) if secondary_node_count == 0: # Add service to recycle because its in primary domain new_services.append(service_to_recycle) nodes.add(service_to_recycle.storagerouter.ip) # Add extra (new) slaves until primary safety reached nodes, new_services = _add_suitable_nodes(_importance='primary', _safety=recommended_primary) # Add recycled secondary slave after primary slaves have been added if secondary_node_count == 1: new_services.append(service_to_recycle) nodes.add(service_to_recycle.storagerouter.ip) # Add extra (new) slaves until secondary safety reached if len(secondary_storagerouters) > 0: nodes, new_services = _add_suitable_nodes(_importance='secondary', _safety=safety) # Add extra slaves from primary domain in case no suitable nodes found in secondary domain if len(nodes) < safety: nodes, new_services = _add_suitable_nodes( _importance='primary', _safety=safety) # Build the new configuration and update the vdisk configs_no_ex_master = [] configs_all = [] for service in new_services: client = MetadataServerClient.load(service) client.create_namespace(str(vdisk.volume_id)) # noinspection PyArgumentList config = MDSNodeConfig(address=str(service.storagerouter.ip), port=service.ports[0]) if previous_master != service: configs_no_ex_master.append(config) configs_all.append(config) try: if len(configs_no_ex_master) != len(configs_all): vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig( configs_no_ex_master)) vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig(configs_all)) except Exception: MDSServiceController._logger.exception( 'MDS safety: vDisk {0}: Failed to update the metadata backend configuration' .format(vdisk.guid)) raise Exception( 'MDS configuration for volume {0} with guid {1} could not be changed' .format(vdisk.name, vdisk.guid)) for service in new_services[1:]: client = MetadataServerClient.load(service) client.set_role(str(vdisk.volume_id), MetadataServerClient.MDS_ROLE.SLAVE) MDSServiceController.sync_vdisk_to_reality(vdisk) MDSServiceController._logger.debug( 'MDS safety: vDisk {0}: Completed'.format(vdisk.guid))
def ensure_safety(vdisk, excluded_storagerouters=None): """ Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor). Assumptions: * A local overloaded master is better than a non-local non-overloaded master * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded) * Too much safety is not wanted (it adds loads to nodes while not required) :param vdisk: vDisk to calculate a new safety for :type vdisk: VDisk :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable) :type excluded_storagerouters: list :return: None """ def _add_suitable_nodes(_importance, _safety): if len(nodes) < _safety: for local_load in sorted(all_info_dict[_importance]["loads"]): for local_service in all_info_dict[_importance]["loads"][local_load]: if len(nodes) < _safety and local_service.storagerouter.ip not in nodes: try: SSHClient(local_service.storagerouter) new_services.append(local_service) nodes.add(local_service.storagerouter.ip) except UnableToConnectException: MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format( vdisk.guid, service.storagerouter.ip ) ) return nodes, new_services MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Start checkup for virtual disk {1}".format(vdisk.guid, vdisk.name) ) tlogs = Configuration.get("/ovs/framework/storagedriver|mds_tlogs") safety = Configuration.get("/ovs/framework/storagedriver|mds_safety") max_load = Configuration.get("/ovs/framework/storagedriver|mds_maxload") ###################### # GATHER INFORMATION # ###################### vdisk.reload_client("storagedriver") vdisk.reload_client("objectregistry") vdisk.invalidate_dynamics(["storagedriver_id", "storagerouter_guid"]) if vdisk.storagerouter_guid is None: raise SRCObjectNotFoundException( "Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router".format( vdisk.name, vdisk.guid ) ) if excluded_storagerouters is None: excluded_storagerouters = [] # Sorted was added merely for unittests, because they rely on specific order of services and their ports # Default sorting behavior for relations used to be based on order in which relations were added # Now sorting is based on guid (DAL speedup changes) nodes = set() services = sorted( [ mds_service.service for mds_service in vdisk.vpool.mds_services if mds_service.service.storagerouter not in excluded_storagerouters ], key=lambda k: k.ports, ) service_per_key = {} for service in services: nodes.add(service.storagerouter.ip) service_per_key["{0}:{1}".format(service.storagerouter.ip, service.ports[0])] = service # Create a pool of StorageRouters being a part of the primary and secondary domains of this Storage Router vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid) primary_domains = [junction.domain for junction in vdisk_storagerouter.domains if junction.backup is False] secondary_domains = [junction.domain for junction in vdisk_storagerouter.domains if junction.backup is True] primary_storagerouters = set() secondary_storagerouters = set() for domain in primary_domains: primary_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain)) for domain in secondary_domains: secondary_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain)) # In case no domains have been configured if len(primary_storagerouters) == 0: primary_storagerouters = set(StorageRouterList.get_storagerouters()) if vdisk_storagerouter not in primary_storagerouters or vdisk_storagerouter in secondary_storagerouters: raise ValueError( "StorageRouter {0} for vDisk {1} should be part of the primary domains and NOT be part of the secondary domains".format( vdisk_storagerouter.name, vdisk.name ) ) # Remove all storagerouters from secondary which are present in primary secondary_storagerouters = secondary_storagerouters.difference(primary_storagerouters) ################################### # VERIFY RECONFIGURATION REQUIRED # ################################### vdisk.invalidate_dynamics(["info"]) configs = vdisk.info["metadata_backend_config"] # Ordered MASTER, SLAVE (secondary domain of master) master_service = None reconfigure_reasons = [] if len(configs) > 0: config = configs.pop(0) config_key = "{0}:{1}".format(config["ip"], config["port"]) master_service = service_per_key.get(config_key) if master_service is None: reconfigure_reasons.append( "Master ({0}:{1}) cannot be used anymore".format(config["ip"], config["port"]) ) slave_services = [] for config in configs: config_key = "{0}:{1}".format(config["ip"], config["port"]) if config_key in service_per_key: slave_services.append(service_per_key[config_key]) else: reconfigure_reasons.append( "Slave ({0}:{1}) cannot be used anymore".format(config["ip"], config["port"]) ) # If MDS already in use, take current load, else take next load all_info_dict = { "primary": {"used": [], "loads": {}, "available": []}, "secondary": {"used": [], "loads": {}, "available": []}, } services_load = {} for service in services: importance = None if service.storagerouter in primary_storagerouters: importance = "primary" elif service.storagerouter in secondary_storagerouters: importance = "secondary" loads = MDSServiceController.get_mds_load(service.mds_service) if service == master_service or service in slave_services: # Service is still in use load = loads[0] if importance is not None: all_info_dict[importance]["used"].append(service) else: reconfigure_reasons.append( "Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the domains".format( service.name, service.storagerouter.ip ) ) else: # Service is not in use, but available load = loads[1] services_load[service] = load if importance is not None: all_info_dict[importance]["available"].append(service) if load <= max_load: if load not in all_info_dict[importance]["loads"]: all_info_dict[importance]["loads"][load] = [] all_info_dict[importance]["loads"][load].append(service) service_nodes = [] if master_service is not None: service_nodes.append(master_service.storagerouter.ip) for service in slave_services: ip = service.storagerouter.ip if ip in service_nodes: reconfigure_reasons.append("Multiple MDS services on the same node") else: service_nodes.append(ip) if len(service_nodes) > safety: reconfigure_reasons.append("Too much safety") if len(service_nodes) < safety and len(service_nodes) < len(nodes): reconfigure_reasons.append("Not enough safety") if master_service is not None and services_load[master_service] > max_load: reconfigure_reasons.append("Master overloaded") if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid: reconfigure_reasons.append("Master is not local") if any(service for service in slave_services if services_load[service] > max_load): reconfigure_reasons.append("One or more slaves overloaded") # Check reconfigure required based upon domains recommended_primary = math.ceil(safety / 2.0) if len(secondary_storagerouters) > 0 else safety recommended_secondary = safety - recommended_primary if master_service is not None and master_service not in all_info_dict["primary"]["used"]: # Master service not present in primary domain reconfigure_reasons.append("Master service not in primary domain") primary_services_used = len(all_info_dict["primary"]["used"]) primary_services_available = len(all_info_dict["primary"]["available"]) if primary_services_used < recommended_primary and primary_services_used < primary_services_available: # More services can be used in primary domain reconfigure_reasons.append("Not enough services in use in primary domain") if primary_services_used > recommended_primary: # Too many services in primary domain reconfigure_reasons.append("Too many services in use in primary domain") # More services can be used in secondary domain secondary_services_used = len(all_info_dict["secondary"]["used"]) secondary_services_available = len(all_info_dict["secondary"]["available"]) if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available: reconfigure_reasons.append("Not enough services in use in secondary domain") if secondary_services_used > recommended_secondary: # Too many services in secondary domain reconfigure_reasons.append("Too many services in use in secondary domain") # If secondary domain present, check order in which the slave services are configured secondary = False for slave_service in slave_services: if secondary is True and slave_service in all_info_dict["primary"]["used"]: reconfigure_reasons.append("A slave in secondary domain has priority over a slave in primary domain") break if slave_service in all_info_dict["secondary"]["used"]: secondary = True if not reconfigure_reasons: MDSServiceController._logger.debug("MDS safety: vDisk {0}: No reconfiguration required".format(vdisk.guid)) MDSServiceController.sync_vdisk_to_reality(vdisk) return MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Reconfiguration required. Reasons:".format(vdisk.guid) ) for reason in reconfigure_reasons: MDSServiceController._logger.debug("MDS safety: vDisk {0}: * {1}".format(vdisk.guid, reason)) ############################ # CREATE NEW CONFIGURATION # ############################ # Check whether the master (if available) is non-local to the vdisk and/or is overloaded new_services = [] master_ok = master_service is not None if master_ok is True: master_ok = ( master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[master_service] <= max_load ) previous_master = None if master_ok: # Add this master to the fresh configuration new_services.append(master_service) else: # Try to find the best non-overloaded LOCAL MDS slave to make master candidate_master_service = None candidate_master_load = 0 local_mds = None local_mds_load = 0 for service in all_info_dict["primary"]["available"]: load = services_load[service] if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid: if local_mds is None or local_mds_load > load: # This service is a non-overloaded local MDS local_mds = service local_mds_load = load if service in slave_services: if candidate_master_service is None or candidate_master_load > load: # This service is a non-overloaded local slave candidate_master_service = service candidate_master_load = load if candidate_master_service is not None: # A non-overloaded local slave was found. client = MetadataServerClient.load(candidate_master_service) try: amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True) except RuntimeError as ex: if "Namespace does not exist" in ex.message: client.create_namespace(str(vdisk.volume_id)) amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True) else: raise if amount_of_tlogs < tlogs: # Almost there. Catching up right now, and continue as soon as it's up-to-date start = time.time() client.catch_up(str(vdisk.volume_id), False) MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Catchup took {1}s".format(vdisk.guid, round(time.time() - start, 2)) ) # It's up to date, so add it as a new master new_services.append(candidate_master_service) if master_service is not None: # The current master (if available) is now candidate to become one of the slaves slave_services.append(master_service) previous_master = master_service else: # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up if master_service is not None: new_services.append(master_service) new_services.append(candidate_master_service) if candidate_master_service in slave_services: slave_services.remove(candidate_master_service) else: # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave if master_service is not None: new_services.append(master_service) if local_mds is not None: new_services.append(local_mds) if local_mds in slave_services: slave_services.remove(local_mds) # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover nodes = set(service.storagerouter.ip for service in new_services) # Recycle slave for faster failover secondary_node_count = 0 service_to_recycle = None if len(nodes) < safety: if recommended_primary > 1: # If primary is 1, we only have master in primary # Try to recycle slave which is in primary domain for load in sorted(all_info_dict["primary"]["loads"]): for service in all_info_dict["primary"]["loads"][load]: if ( service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes ): try: SSHClient(service.storagerouter) service_to_recycle = service except UnableToConnectException: MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format( vdisk.guid, service.storagerouter.ip ) ) # Try to recycle slave which is in secondary domain if none found in primary if service_to_recycle is None and len(secondary_storagerouters) > 0: for load in sorted(all_info_dict["secondary"]["loads"]): for service in all_info_dict["secondary"]["loads"][load]: if ( service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes ): try: SSHClient(service.storagerouter) service_to_recycle = service secondary_node_count = ( 1 ) # We do not want to configure the secondary slave BEFORE the primary slaves except UnableToConnectException: MDSServiceController._logger.debug( "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format( vdisk.guid, service.storagerouter.ip ) ) if service_to_recycle is not None: slave_services.remove(service_to_recycle) if secondary_node_count == 0: # Add service to recycle because its in primary domain new_services.append(service_to_recycle) nodes.add(service_to_recycle.storagerouter.ip) # Add extra (new) slaves until primary safety reached nodes, new_services = _add_suitable_nodes(_importance="primary", _safety=recommended_primary) # Add recycled secondary slave after primary slaves have been added if secondary_node_count == 1: new_services.append(service_to_recycle) nodes.add(service_to_recycle.storagerouter.ip) # Add extra (new) slaves until secondary safety reached if len(secondary_storagerouters) > 0: nodes, new_services = _add_suitable_nodes(_importance="secondary", _safety=safety) # Add extra slaves from primary domain in case no suitable nodes found in secondary domain if len(nodes) < safety: nodes, new_services = _add_suitable_nodes(_importance="primary", _safety=safety) # Build the new configuration and update the vdisk configs_no_ex_master = [] configs_all = [] for service in new_services: client = MetadataServerClient.load(service) client.create_namespace(str(vdisk.volume_id)) # noinspection PyArgumentList config = MDSNodeConfig(address=str(service.storagerouter.ip), port=service.ports[0]) if previous_master != service: configs_no_ex_master.append(config) configs_all.append(config) try: if len(configs_no_ex_master) != len(configs_all): vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig(configs_no_ex_master), ) vdisk.storagedriver_client.update_metadata_backend_config( volume_id=str(vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig(configs_all) ) except Exception: MDSServiceController._logger.exception( "MDS safety: vDisk {0}: Failed to update the metadata backend configuration".format(vdisk.guid) ) raise Exception( "MDS configuration for volume {0} with guid {1} could not be changed".format(vdisk.name, vdisk.guid) ) for service in new_services[1:]: client = MetadataServerClient.load(service) client.set_role(str(vdisk.volume_id), MetadataServerClient.MDS_ROLE.SLAVE) MDSServiceController.sync_vdisk_to_reality(vdisk) MDSServiceController._logger.debug("MDS safety: vDisk {0}: Completed".format(vdisk.guid))
def get_primary_and_secondary_storagerouters(self): # type: () -> Tuple[List[StorageRouter], List[StorageRouter]] """ Retrieve the primary and secondary storagerouters for MDS deployment :return: Both primary and secondary storagerouters :rtype: Tuple[List[StorageRouter], List[StorageRouter]] """ # Create a pool of StorageRouters being a part of the primary and secondary domains of this StorageRouter vdisk = self.vdisk vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid) primary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is False ] secondary_domains = [ junction.domain for junction in vdisk_storagerouter.domains if junction.backup is True ] primary_storagerouters = set() secondary_storagerouters = set() for domain in primary_domains: primary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) for domain in secondary_domains: secondary_storagerouters.update( StorageRouterList.get_primary_storagerouters_for_domain( domain)) # In case no domains have been configured if len(primary_storagerouters) == 0: primary_storagerouters = set( StorageRouterList.get_storagerouters()) # Remove all excluded StorageRouters from primary StorageRouters primary_storagerouters = primary_storagerouters.difference( self.excluded_storagerouters) # Remove all StorageRouters from secondary which are present in primary, all excluded secondary_storagerouters = secondary_storagerouters.difference( primary_storagerouters) secondary_storagerouters = secondary_storagerouters.difference( self.excluded_storagerouters) # Make sure to only use the StorageRouters related to the current vDisk's vPool related_storagerouters = [ sd.storagerouter for sd in vdisk.vpool.storagedrivers if sd.storagerouter is not None ] primary_storagerouters = list( primary_storagerouters.intersection(related_storagerouters)) secondary_storagerouters = list( secondary_storagerouters.intersection(related_storagerouters)) if vdisk_storagerouter not in primary_storagerouters: raise RuntimeError( 'Host of vDisk {0} ({1}) should be part of the primary domains' .format(vdisk.name, vdisk_storagerouter.name)) primary_storagerouters.sort( key=lambda sr: ExtensionsToolbox.advanced_sort(element=sr.ip, separator='.')) secondary_storagerouters.sort( key=lambda sr: ExtensionsToolbox.advanced_sort(element=sr.ip, separator='.')) for primary_storagerouter in primary_storagerouters: self._logger.debug( 'vDisk {0} - Primary StorageRouter {1} with IP {2}'.format( vdisk.guid, primary_storagerouter.name, primary_storagerouter.ip)) for secondary_storagerouter in secondary_storagerouters: self._logger.debug( 'vDisk {0} - Secondary StorageRouter {1} with IP {2}'.format( vdisk.guid, secondary_storagerouter.name, secondary_storagerouter.ip)) for excluded_storagerouter in self.excluded_storagerouters: self._logger.debug( 'vDisk {0} - Excluded StorageRouter {1} with IP {2}'.format( vdisk.guid, excluded_storagerouter.name, excluded_storagerouter.ip)) return primary_storagerouters, secondary_storagerouters