def list_rse_attributes(rse): """ List RSE attributes for a RSE_MODULE. :param rse: The RSE name. :returns: List of all RSE attributes for a RSE_MODULE. """ return rse_module.list_rse_attributes(rse=rse)
def __init__(self, account, rses, weight, copies, session=None): """ Initialize the RSE Selector. :param account: Account owning the rule. :param rses: List of rse dictionaries. :param weight: Weighting to use. :param copies: Number of copies to create. :param session: DB Session in use. :raises: InvalidRuleWeight, InsufficientAccountLimit, InsufficientTargetRSEs """ self.account = account self.rses = [] # [{'rse_id':, 'weight':, 'staging_area'}] self.copies = copies if weight is not None: for rse in rses: attributes = list_rse_attributes(rse=None, rse_id=rse['id'], session=session) if weight not in attributes: continue # The RSE does not have the required weight set, therefore it is ignored try: self.rses.append({'rse_id': rse['id'], 'weight': float(attributes[weight]), 'staging_area': rse['staging_area']}) except ValueError: raise InvalidRuleWeight('The RSE with id \'%s\' has a non-number specified for the weight \'%s\'' % (rse['id'], weight)) else: for rse in rses: self.rses.append({'rse_id': rse['id'], 'weight': 1, 'staging_area': rse['staging_area']}) if len(self.rses) < self.copies: raise InsufficientTargetRSEs('Target RSE set not sufficient for number of copies. (%s copies requested, RSE set size %s)' % (self.copies, len(self.rses))) for rse in self.rses: # TODO: Add RSE-space-left here! rse['quota_left'] = get_account_limit(account=account, rse_id=rse['rse_id'], session=session)\ - get_counter(rse_id=rse['rse_id'], account=account, session=session)['bytes'] self.rses = [rse for rse in self.rses if rse['quota_left'] > 0] if len(self.rses) < self.copies: raise InsufficientAccountLimit('There is insufficient quota on any of the target RSE\'s to fullfill the operation.')
def perm_delete_global_account_limit(issuer, kwargs): """ Checks if an account can delete a global account limit. :param issuer: Account identifier which issues the command. :param kwargs: List of arguments for the action. :returns: True if account is allowed, otherwise False """ if _is_root(issuer) or has_account_attribute(account=issuer, key='admin'): return True # Check if user is a country admin admin_in_country = set() for kv in list_account_attributes(account=issuer): if kv['key'].startswith('country-') and kv['value'] == 'admin': admin_in_country.add(kv['key'].partition('-')[2]) if admin_in_country: resolved_rse_countries = {list_rse_attributes(rse_id=rse['rse_id']).get('country') for rse in parse_expression(kwargs['rse_exp'])} if resolved_rse_countries.issubset(admin_in_country): return True return False
def perm_add_rule(issuer, kwargs, session=None): """ Checks if an account can add a replication rule. :param issuer: Account identifier which issues the command. :param kwargs: List of arguments for the action. :param session: The DB session to use :returns: True if account is allowed, otherwise False """ rses = parse_expression(kwargs['rse_expression'], filter_={'vo': issuer.vo}, session=session) # Keep while sync is running so it can make rules on all RSEs if _is_root(issuer) and repr(kwargs['account']).startswith('sync_'): return True if isinstance(repr(issuer), basestring) and repr(issuer).startswith('sync_'): return True # Anyone can use _Temp RSEs if a lifetime is set and under a month all_temp = True for rse in rses: rse_attr = list_rse_attributes(rse_id=rse['id'], session=session) rse_type = rse_attr.get('cms_type', None) if rse_type not in ['temp']: all_temp = False if all_temp and kwargs[ 'lifetime'] is not None and kwargs['lifetime'] < 31 * 24 * 60 * 60: return True if kwargs['account'] == issuer and not kwargs['locked']: return True if _is_root(issuer) or has_account_attribute( account=issuer, key='admin', session=session): return True return False
def perm_delete_local_account_limit(issuer, kwargs, session=None): """ Checks if an account can delete an account limit. :param account: Account identifier which issues the command. :param kwargs: List of arguments for the action. :param session: The DB session to use :returns: True if account is allowed, otherwise False """ if _is_root(issuer) or has_account_attribute( account=issuer, key='admin', session=session): return True # Check if user is a country admin admin_in_country = [] for kv in list_account_attributes(account=issuer, session=session): if kv['key'].startswith('country-') and kv['value'] == 'admin': admin_in_country.append(kv['key'].partition('-')[2]) if admin_in_country and list_rse_attributes( rse_id=kwargs['rse_id'], session=session).get('country') in admin_in_country: return True return False
def get_rse_attributes(rse_id, session=None): """ List rse attributes :param rse: the rse name. :param rse_id: The RSE id. :param session: The database session in use. :returns: A dictionary with RSE attributes for a RSE. """ result = REGION.get(rse_id) if isinstance(result, NoValue): try: result = None result = rse_core.list_rse_attributes(rse_id=rse_id, session=session) REGION.set(rse_id, result) except: logging.warning("Failed to get RSE %s attributes, error: %s" % (rse_id, traceback.format_exc())) return result
def perm_set_local_account_limit(issuer, kwargs): """ Checks if an account can set an account limit. :param account: Account identifier which issues the command. :param kwargs: List of arguments for the action. :returns: True if account is allowed, otherwise False """ if _is_root(issuer) or has_account_attribute(account=issuer, key='admin'): return True # Check if user is a country admin admin_in_country = [] for kv in list_account_attributes(account=issuer): if kv['key'].startswith('country-') and kv['value'] == 'admin': admin_in_country.append(kv['key'].partition('-')[2]) rse_attr = list_rse_attributes(rse_id=kwargs['rse_id']) if admin_in_country and rse_attr.get('country') in admin_in_country: return True quota_approvers = rse_attr.get('quota_approvers', None) if quota_approvers and issuer.external in quota_approvers.split(','): return True return False
def perm_approve_rule(issuer, kwargs): """ Checks if an issuer can approve a replication rule. :param issuer: Account identifier which issues the command. :param kwargs: List of arguments for the action. :returns: True if account is allowed to call the API call, otherwise False """ if _is_root(issuer) or has_account_attribute(account=issuer, key='admin'): return True rule = get_rule(rule_id=kwargs['rule_id']) rses = parse_expression(rule['rse_expression']) # Those in rule_approvers can approve the rule for rse in rses: rse_attr = list_rse_attributes(rse_id=rse['id']) rule_approvers = rse_attr.get('rule_approvers', None) if rule_approvers and issuer.external in rule_approvers.split(','): return True return False
def perm_update_replicas_states(issuer, kwargs): """ Checks if an account can delete replicas. :param issuer: Account identifier which issues the command. :param kwargs: List of arguments for the action. :returns: True if account is allowed, otherwise False """ rse = str(kwargs.get('rse', '')) phys_group = [] for kv in list_account_attributes(account=issuer): if kv['key'].startswith('group-') and kv['value'] in ['admin', 'user']: phys_group.append(kv['key'].partition('-')[2]) rse_attr = list_rse_attributes(rse=rse) if phys_group: if rse_attr.get('type', '') == 'GROUPDISK': if rse_attr.get('physgroup', '') in phys_group: return True else: return rse_attr.get('type', '') in ['SCRATCHDISK', 'MOCK', 'LOCALGROUPDISK', 'TEST']\ or issuer == 'root'\ or has_account_attribute(account=issuer, key='admin')
def perm_get_global_account_usage(issuer, kwargs, session=None): """ Checks if an account can get the account usage of an account. :param issuer: Account identifier which issues the command. :param kwargs: List of arguments for the action. :param session: The DB session to use :returns: True if account is allowed, otherwise False """ if _is_root(issuer) or has_account_attribute(account=issuer, key='admin', session=session) or kwargs.get('account') == issuer: return True # Check if user is a country admin for all involved countries admin_in_country = set() for kv in list_account_attributes(account=issuer, session=session): if kv['key'].startswith('country-') and kv['value'] == 'admin': admin_in_country.add(kv['key'].partition('-')[2]) resolved_rse_countries = {list_rse_attributes(rse_id=rse['rse_id'], session=session).get('country') for rse in parse_expression(kwargs['rse_exp'], filter_={'vo': issuer.vo}, session=session)} if resolved_rse_countries.issubset(admin_in_country): return True return False
def __init__(self, path=''): try: self.model = json.load(open(path, 'r')) except IOError: self.model = { "STANDARD_LINK": { "r2": 0.0, "rmse": 0.0, "rate": 23399639.38262837, "datalen": 0.0, "overhead": 12.025538382153206, "diskrw": 12046990.897099394 } } # Create RSE to site name dictionary self._rse2site = {} self._rseid2site = {} self._site2rses = {} self._site2rseids = {} for rse in list_rses(): if rse['deleted'] is False: attribs = list_rse_attributes(rse['rse']) try: self._rse2site[rse['rse']] = attribs['site'] self._rseid2site[rse['id']] = attribs['site'] except KeyError: logging.warning('(T3CModel rse2site mapper) No site for ' + rse['rse']) continue if attribs['site'] not in self._site2rses.keys(): self._site2rses[attribs['site']] = [] self._site2rseids[attribs['site']] = [] self._site2rses[attribs['site']].append(rse['rse']) self._site2rseids[attribs['site']].append(rse['id'])
def import_data(data, session=None): """ Import data to add and update records in Rucio. :param data: data to be imported as dictionary. :param session: database session in use. """ # RSEs rses = data.get('rses') if rses: for rse in rses: protocols = rse.get('protocols') if protocols: protocols = protocols.get('protocols') del rse['protocols'] rse_name = rse['rse'] del rse['rse'] if not rse_module.rse_exists(rse_name, session=session): rse_module.add_rse(rse_name, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) else: rse_module.update_rse(rse_name, rse, session=session) # Protocols if protocols: old_protocols = rse_module.get_rse_protocols(rse=rse_name, session=session) for protocol in protocols: scheme = protocol.get('scheme') hostname = protocol.get('hostname') port = protocol.get('port') intersection = [ old_protocol for old_protocol in old_protocols['protocols'] if old_protocol['scheme'] == scheme and old_protocol['hostname'] == hostname and old_protocol['port'] == port ] if intersection: del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse=rse_name, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) else: rse_module.add_protocol(rse=rse_name, parameter=protocol, session=session) # Limits limits = rse.get('limits') if limits: old_limits = rse_module.get_rse_limits(rse=rse_name, session=session) for limit in limits: if limit in old_limits: rse_module.delete_rse_limit(rse=rse_name, name=limit, session=session) rse_module.set_rse_limits(rse=rse_name, name=limit, value=limits[limit], session=session) # Transfer limits transfer_limits = rse.get('transfer_limits') if transfer_limits: for limit in transfer_limits: old_transfer_limits = rse_module.get_rse_transfer_limits( rse=rse_name, activity=limit, session=session) if limit in old_transfer_limits: rse_module.delete_rse_transfer_limits(rse=rse_name, activity=limit, session=session) max_transfers = transfer_limits[limit].items( )[0][1]['max_transfers'] rse_module.set_rse_transfer_limits( rse=rse_name, activity=limit, max_transfers=max_transfers, session=session) # Attributes attributes = rse.get('attributes') if attributes: old_attributes = rse_module.list_rse_attributes( rse=rse_name, session=session) for attr in attributes: if attr in old_attributes: rse_module.del_rse_attribute(rse=rse_name, key=attr, session=session) rse_module.add_rse_attribute(rse=rse_name, key=attr, value=attributes[attr], session=session) # Distances distances = data.get('distances') if distances: for src_rse_name in distances: src = rse_module.get_rse_id(src_rse_name, session=session) for dest_rse_name in distances[src_rse_name]: dest = rse_module.get_rse_id(dest_rse_name, session=session) distance = distances[src_rse_name][dest_rse_name] del distance['src_rse_id'] del distance['dest_rse_id'] old_distance = distance_module.get_distances(src_rse_id=src, dest_rse_id=dest, session=session) if old_distance: distance_module.update_distances(src_rse_id=src, dest_rse_id=dest, parameters=distance, session=session) else: distance_module.add_distance( src_rse_id=src, dest_rse_id=dest, ranking=distance.get('ranking'), agis_distance=distance.get('agis_distance'), geoip_distance=distance.get('geoip_distance'), active=distance.get('active'), submitted=distance.get('submitted'), transfer_speed=distance.get('transfer_speed'), finished=distance.get('finished'), failed=distance.get('failed'), session=session)
def __check_rse_usage(rse, rse_id, prepend_str): """ Internal method to check RSE usage and limits. :param rse_id: the rse name. :param rse_id: the rse id. :returns : max_being_deleted_files, needed_free_space, used, free. """ result = REGION.get('rse_usage_%s' % rse_id) if result is NO_VALUE: max_being_deleted_files, needed_free_space, used, free, obsolete = None, None, None, None, None # Get RSE limits limits = get_rse_limits(rse_id=rse_id) if not limits and 'MinFreeSpace' not in limits and 'MaxBeingDeletedFiles' not in limits: result = (max_being_deleted_files, needed_free_space, used, free) REGION.set('rse_usage_%s' % rse_id, result) return result min_free_space = limits.get('MinFreeSpace') max_being_deleted_files = limits.get('MaxBeingDeletedFiles') # Check from which sources to get used and total spaces # Default is storage attributes = list_rse_attributes(rse_id=rse_id) source_for_total_space = attributes.get('sourceForTotalSpace', 'storage') source_for_used_space = attributes.get('sourceForUsedSpace', 'storage') greedy = attributes.get('greedyDeletion', False) logging.debug( '%s RSE: %s, source_for_total_space: %s, source_for_used_space: %s', prepend_str, rse, source_for_total_space, source_for_used_space) # First of all check if greedy mode is enabled for this RSE if greedy: result = (max_being_deleted_files, 1000000000000, used, free) REGION.set('rse_usage_%s' % rse_id, result) return result # Get total, used and obsolete space rse_usage = get_rse_usage(rse_id=rse_id) usage = [entry for entry in rse_usage if entry['source'] == 'obsolete'] for var in usage: obsolete = var['used'] break usage = [ entry for entry in rse_usage if entry['source'] == source_for_total_space ] # If no information is available about disk space, do nothing except if there are replicas with Epoch tombstone if not usage: if not obsolete: result = (max_being_deleted_files, needed_free_space, used, free) REGION.set('rse_usage_%s' % rse_id, result) return result result = (max_being_deleted_files, obsolete, used, free) REGION.set('rse_usage_%s' % rse_id, result) return result # Extract the total and used space for var in usage: total, used = var['total'], var['used'] break if source_for_total_space != source_for_used_space: usage = [ entry for entry in rse_usage if entry['source'] == source_for_used_space ] if not usage: result = (max_being_deleted_files, needed_free_space, None, free) REGION.set('rse_usage_%s' % rse_id, result) return result for var in usage: used = var['used'] break free = total - used if min_free_space: needed_free_space = min_free_space - free # If needed_free_space negative, nothing to delete except if some Epoch tombstoned replicas if needed_free_space <= 0: needed_free_space = 0 or obsolete result = (max_being_deleted_files, needed_free_space, used, free) REGION.set('rse_usage_%s' % rse_id, result) return result logging.debug('%s Using cached value for RSE usage on RSE %s', prepend_str, rse) return result
def __init__(self, account, rses, weight, copies, ignore_account_limit=False, session=None): """ Initialize the RSE Selector. :param account: Account owning the rule. :param rses: List of rse dictionaries. :param weight: Weighting to use. :param copies: Number of copies to create. :param ignore_account_limit: Flag if the quota should be ignored. :param session: DB Session in use. :raises: InvalidRuleWeight, InsufficientAccountLimit, InsufficientTargetRSEs """ self.account = account self.rses = [] # [{'rse_id':, 'weight':, 'staging_area'}] self.copies = copies if weight is not None: for rse in rses: attributes = list_rse_attributes(rse_id=rse['id'], session=session) availability_write = True if rse.get('availability', 7) & 2 else False if weight not in attributes: continue # The RSE does not have the required weight set, therefore it is ignored try: self.rses.append({ 'rse_id': rse['id'], 'weight': float(attributes[weight]), 'mock_rse': attributes.get('mock', False), 'availability_write': availability_write, 'staging_area': rse['staging_area'] }) except ValueError: raise InvalidRuleWeight( 'The RSE \'%s\' has a non-number specified for the weight \'%s\'' % (rse['rse'], weight)) else: for rse in rses: mock_rse = has_rse_attribute(rse['id'], 'mock', session=session) availability_write = True if rse.get('availability', 7) & 2 else False self.rses.append({ 'rse_id': rse['id'], 'weight': 1, 'mock_rse': mock_rse, 'availability_write': availability_write, 'staging_area': rse['staging_area'] }) if len(self.rses) < self.copies: raise InsufficientTargetRSEs( 'Target RSE set not sufficient for number of copies. (%s copies requested, RSE set size %s)' % (self.copies, len(self.rses))) rses_with_enough_quota = [] if has_account_attribute(account=account, key='admin', session=session) or ignore_account_limit: for rse in self.rses: rse['quota_left'] = float('inf') rse['space_left'] = float('inf') rses_with_enough_quota.append(rse) else: global_quota_limit = get_global_account_limits(account=account, session=session) all_rse_usages = { usage['rse_id']: usage['bytes'] for usage in get_all_rse_usages_per_account(account=account, session=session) } for rse in self.rses: if rse['mock_rse']: rse['quota_left'] = float('inf') rse['space_left'] = float('inf') rses_with_enough_quota.append(rse) else: # check local quota local_quota_left = None quota_limit = get_local_account_limit(account=account, rse_id=rse['rse_id'], session=session) if quota_limit is None: local_quota_left = 0 else: local_quota_left = quota_limit - get_usage( rse_id=rse['rse_id'], account=account, session=session)['bytes'] # check global quota rse['global_quota_left'] = {} all_global_quota_enough = True for rse_expression, limit in global_quota_limit.items(): if rse['rse_id'] in limit['resolved_rse_ids']: quota_limit = limit['limit'] global_quota_left = None if quota_limit is None: global_quota_left = 0 else: rse_expression_usage = 0 for rse_id in limit['resolved_rse_ids']: rse_expression_usage += all_rse_usages.get( rse_id, 0) global_quota_left = quota_limit - rse_expression_usage if global_quota_left <= 0: all_global_quota_enough = False break else: rse['global_quota_left'][ rse_expression] = global_quota_left if local_quota_left > 0 and all_global_quota_enough: rse['quota_left'] = local_quota_left space_limit = get_rse_limits( name='MaxSpaceAvailable', rse_id=rse['rse_id'], session=session).get('MaxSpaceAvailable') if space_limit is None or space_limit < 0: rse['space_left'] = float('inf') else: rse['space_left'] = space_limit - get_rse_counter( rse_id=rse['rse_id'], session=session)['bytes'] rses_with_enough_quota.append(rse) self.rses = rses_with_enough_quota if len(self.rses) < self.copies: raise InsufficientAccountLimit( 'There is insufficient quota on any of the target RSE\'s to fullfill the operation.' )
def import_rses(rses, vo='def', session=None): new_rses = [] for rse_name in rses: rse = rses[rse_name] if isinstance(rse.get('rse_type'), string_types): rse['rse_type'] = RSEType.from_string(str(rse['rse_type'])) try: rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, session=session) except RSENotFound: rse_id = rse_module.add_rse(rse=rse_name, vo=vo, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) else: rse_module.update_rse(rse_id=rse_id, parameters=rse, session=session) new_rses.append(rse_id) # Protocols new_protocols = rse.get('protocols') if new_protocols: # update existing, add missing and remove left over protocols old_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in rse_module.get_rse_protocols( rse_id=rse_id, session=session)['protocols']] missing_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } not in old_protocols ] outdated_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } in old_protocols ] new_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in new_protocols] to_be_removed_protocols = [ old_protocol for old_protocol in old_protocols if old_protocol not in new_protocols ] for protocol in outdated_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse_id=rse_id, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) for protocol in missing_protocols: rse_module.add_protocol(rse_id=rse_id, parameter=protocol, session=session) for protocol in to_be_removed_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] rse_module.del_protocols(rse_id=rse_id, scheme=scheme, port=port, hostname=hostname, session=session) # Limits old_limits = rse_module.get_rse_limits(rse_id=rse_id, session=session) for limit_name in ['MaxBeingDeletedFiles', 'MinFreeSpace']: limit = rse.get(limit_name) if limit: if limit_name in old_limits: rse_module.delete_rse_limit(rse_id=rse_id, name=limit_name, session=session) rse_module.set_rse_limits(rse_id=rse_id, name=limit_name, value=limit, session=session) # Attributes attributes = rse.get('attributes', {}) attributes['lfn2pfn_algorithm'] = rse.get('lfn2pfn_algorithm') attributes['verify_checksum'] = rse.get('verify_checksum') old_attributes = rse_module.list_rse_attributes(rse_id=rse_id, session=session) for attr in attributes: value = attributes[attr] if value is not None: if attr in old_attributes: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) # set deleted flag to RSEs that are missing in the import data old_rses = [ old_rse['id'] for old_rse in rse_module.list_rses(session=session) ] for old_rse in old_rses: if old_rse not in new_rses: try: rse_module.del_rse(rse_id=old_rse, session=session) except RSEOperationNotSupported: pass
def get_transfer(rse, req, scheme, mock): src_spacetoken = None ts = time.time() sources, metadata = get_sources(rse, scheme, req) record_timer('daemons.conveyor.submitter.get_sources', (time.time() - ts) * 1000) logging.debug('Sources for request %s: %s' % (req['request_id'], sources)) if sources is None: logging.error("Request %s DID %s:%s RSE %s failed to get sources" % (req['request_id'], req['scope'], req['name'], rse['rse'])) return None filesize = metadata['filesize'] md5 = metadata['md5'] adler32 = metadata['adler32'] ts = time.time() destinations, dest_spacetoken = get_destinations(rse, scheme, req, sources) record_timer('daemons.conveyor.submitter.get_destinations', (time.time() - ts) * 1000) logging.debug('Destinations for request %s: %s' % (req['request_id'], destinations)) if destinations is None: logging.error("Request %s DID %s:%s RSE %s failed to get destinations" % (req['request_id'], req['scope'], req['name'], rse['rse'])) return None # Come up with mock sources if necessary if mock: tmp_sources = [] for s in sources: tmp_sources.append((s[0], ':'.join(['mock']+s[1].split(':')[1:]))) sources = tmp_sources tmp_metadata = {'request_id': req['request_id'], 'scope': req['scope'], 'name': req['name'], 'activity': req['activity'], 'src_rse': sources[0][0], 'dst_rse': rse['rse'], 'dest_rse_id': req['dest_rse_id'], 'filesize': filesize, 'md5': md5, 'adler32': adler32} if 'previous_attempt_id' in req and req['previous_attempt_id']: tmp_metadata['previous_attempt_id'] = req['previous_attempt_id'] # Extend the metadata dictionary with request attributes copy_pin_lifetime, overwrite, bring_online = -1, True, None if req['request_type'] == RequestType.STAGEIN: if req['attributes']: if type(req['attributes']) is dict: attr = json.loads(json.dumps(req['attributes'])) else: attr = json.loads(str(req['attributes'])) copy_pin_lifetime = attr.get('lifetime') overwrite = False bring_online = 21000 # if the source for transfer is a tape rse, set bring_online if req['request_type'] == RequestType.TRANSFER\ and rse_core.get_rse(sources[0][0]).rse_type == RSEType.TAPE: bring_online = 21000 # never overwrite on tape destinations if req['request_type'] == RequestType.TRANSFER\ and rse_core.get_rse(None, rse_id=req['dest_rse_id']).rse_type == RSEType.TAPE: overwrite = False # exclude destination replica from source source_surls = [s[1] for s in sources] if req['request_type'] == RequestType.STAGEIN and source_surls.sort() == destinations.sort(): logging.debug('STAGING REQUEST %s - Will not try to ignore equivalent sources' % req['request_id']) elif req['request_type'] == RequestType.STAGEIN: logging.debug('STAGING REQUEST %s - Forcing destination to source' % req['request_id']) destinations = source_surls else: new_sources = source_surls for source_surl in source_surls: if source_surl in destinations: logging.info('Excluding source %s for request %s' % (source_surl, req['request_id'])) new_sources.remove(source_surl) # make sure we only use one source when bring_online is needed if bring_online and len(new_sources) > 1: source_surls = [new_sources[0]] logging.info('Only using first source %s for bring_online request %s' % (source_surls, req['request_id'])) if not source_surls: logging.error('All sources excluded - SKIP REQUEST %s' % req['request_id']) return # Sources are properly set, so now we can finally force the source RSE to the destination RSE for STAGEIN if req['request_type'] == RequestType.STAGEIN: tmp_metadata['dst_rse'] = sources[0][0] # get external host if rse_core.get_rse(rse['rse'])['staging_area'] or rse['rse'].endswith("STAGING"): rse_attr = rse_core.list_rse_attributes(sources[0][0]) else: rse_attr = rse_core.list_rse_attributes(rse['rse'], rse['id']) fts_hosts = rse_attr.get('fts', None) retry_count = req['retry_count'] if not retry_count: retry_count = 0 if not fts_hosts: logging.error('Destination RSE %s FTS attribute not defined - SKIP REQUEST %s' % (rse['rse'], req['request_id'])) return fts_list = fts_hosts.split(",") external_host = fts_list[retry_count/len(fts_list)] transfer = {'request_id': req['request_id'], 'src_urls': source_surls, 'dest_urls': destinations, 'filesize': filesize, 'md5': md5, 'adler32': adler32, 'src_spacetoken': src_spacetoken, 'dest_spacetoken': dest_spacetoken, 'activity': req['activity'], 'overwrite': overwrite, 'bring_online': bring_online, 'copy_pin_lifetime': copy_pin_lifetime, 'external_host': external_host, 'file_metadata': tmp_metadata} return transfer
def import_rses(rses, rse_sync_method='edit', attr_sync_method='edit', protocol_sync_method='edit', vo='def', session=None): new_rses = [] for rse_name in rses: rse = rses[rse_name] if isinstance(rse.get('rse_type'), string_types): rse['rse_type'] = RSEType(rse['rse_type']) if rse_module.rse_exists(rse_name, vo=vo, include_deleted=False, session=session): # RSE exists and is active rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, session=session) selected_rse_properties = { key: rse[key] for key in rse if key in rse_module.MUTABLE_RSE_PROPERTIES } rse_module.update_rse(rse_id=rse_id, parameters=selected_rse_properties, session=session) elif rse_module.rse_exists(rse_name, vo=vo, include_deleted=True, session=session): # RSE exists but in deleted state # Should only modify the RSE if importer is configured for edit or hard sync if rse_sync_method in ['edit', 'hard']: rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, include_deleted=True, session=session) rse_module.restore_rse(rse_id, session=session) selected_rse_properties = { key: rse[key] for key in rse if key in rse_module.MUTABLE_RSE_PROPERTIES } rse_module.update_rse(rse_id=rse_id, parameters=selected_rse_properties, session=session) else: # Config is in RSE append only mode, should not modify the disabled RSE continue else: rse_id = rse_module.add_rse(rse=rse_name, vo=vo, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) new_rses.append(rse_id) # Protocols new_protocols = rse.get('protocols') if new_protocols: # update existing, add missing and remove left over protocols old_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in rse_module.get_rse_protocols( rse_id=rse_id, session=session)['protocols']] missing_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } not in old_protocols ] outdated_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } in old_protocols ] new_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in new_protocols] to_be_removed_protocols = [ old_protocol for old_protocol in old_protocols if old_protocol not in new_protocols ] if protocol_sync_method == 'append': outdated_protocols = [] for protocol in outdated_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse_id=rse_id, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) for protocol in missing_protocols: rse_module.add_protocol(rse_id=rse_id, parameter=protocol, session=session) if protocol_sync_method == 'hard': for protocol in to_be_removed_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] rse_module.del_protocols(rse_id=rse_id, scheme=scheme, port=port, hostname=hostname, session=session) # Limits old_limits = rse_module.get_rse_limits(rse_id=rse_id, session=session) for limit_name in ['MaxBeingDeletedFiles', 'MinFreeSpace']: limit = rse.get(limit_name) if limit: if limit_name in old_limits: rse_module.delete_rse_limits(rse_id=rse_id, name=limit_name, session=session) rse_module.set_rse_limits(rse_id=rse_id, name=limit_name, value=limit, session=session) # Attributes attributes = rse.get('attributes', {}) attributes['lfn2pfn_algorithm'] = rse.get('lfn2pfn_algorithm') attributes['verify_checksum'] = rse.get('verify_checksum') old_attributes = rse_module.list_rse_attributes(rse_id=rse_id, session=session) missing_attributes = [ attribute for attribute in old_attributes if attribute not in attributes ] for attr in attributes: value = attributes[attr] if value is not None: if attr in old_attributes: if attr_sync_method not in ['append']: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) else: rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) if attr_sync_method == 'hard': for attr in missing_attributes: if attr != rse_name: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) # set deleted flag to RSEs that are missing in the import data old_rses = [ old_rse['id'] for old_rse in rse_module.list_rses(session=session) ] if rse_sync_method == 'hard': for old_rse in old_rses: if old_rse not in new_rses: try: rse_module.del_rse(rse_id=old_rse, session=session) except RSEOperationNotSupported: pass
def test_importer_rest(self): """ IMPORTER (REST): test import. """ mw = [] headers1 = { 'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******' } headers1.update(self.vo_header) r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) token = str(r1.header('X-Rucio-Auth-Token')) headers2 = { 'X-Rucio-Type': 'user', 'X-Rucio-Account': 'root', 'X-Rucio-Auth-Token': str(token) } r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data1)) assert_equal(r2.status, 201, r2.body) # RSE that not existed before check_rse(self.new_rse, self.data1['rses']) check_protocols(self.new_rse, self.data1['rses']) new_rse_id = get_rse_id(rse=self.new_rse, **self.vo) protocols = get_rse_protocols(self.old_rse_id_1) protocols = [{ 'hostname': protocol['hostname'], 'scheme': protocol['scheme'], 'port': protocol['port'] } for protocol in protocols['protocols']] assert_true({ 'hostename': 'hostname3', 'port': 1000, 'scheme': 'scheme3' } not in protocols) attributes = list_rse_attributes(rse_id=new_rse_id) assert_equal(attributes['attr1'], 'test') limits = get_rse_limits(rse_id=new_rse_id) assert_equal(limits['MinFreeSpace'], 20000) # RSE 1 that already existed before check_rse(self.old_rse_1, self.data1['rses']) check_protocols(self.old_rse_1, self.data1['rses']) attributes = list_rse_attributes(rse_id=self.old_rse_id_1) assert_equal(attributes['attr1'], 'test1') assert_equal(attributes['attr2'], 'test2') limits = get_rse_limits(rse_id=self.old_rse_id_1) assert_equal(limits['MaxBeingDeletedFiles'], 1000) assert_equal(limits['MinFreeSpace'], 10000) distance = get_distances(self.old_rse_id_1, self.old_rse_id_2)[0] assert_equal(distance['ranking'], 10) distance = get_distances(self.old_rse_id_1, self.old_rse_id_3)[0] assert_equal(distance['ranking'], 4) with assert_raises(RSENotFound): get_rse(rse_id=self.old_rse_id_4) r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data2)) assert_equal(r2.status, 201) r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data3)) assert_equal(r2.status, 201)
def test_importer_client(self): """ IMPORTER (CLIENT): test import. """ import_client = ImportClient() import_client.import_data(data=self.data1) # RSE that had not existed before rse = get_rse(self.new_rse) assert_equal(rse['availability'], 5) assert_equal(rse['city'], 'NewCity') assert_equal(rse['rse_type'], RSEType.TAPE) protocols = [{ 'hostname': protocol['hostname'], 'scheme': protocol['scheme'], 'port': protocol['port'] } for protocol in get_rse_protocols(self.new_rse)['protocols']] assert_true({ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000 } in protocols) attributes = list_rse_attributes(rse=self.new_rse) assert_equal(attributes['attr1'], 'test') limits = get_rse_limits(rse=self.new_rse) assert_equal(limits['limit1'], 0) transfer_limits = get_rse_transfer_limits(rse=self.new_rse) assert_equal( transfer_limits['activity1'][get_rse_id( self.new_rse)]['max_transfers'], 1) # RSE 1 that already exists rse = get_rse(self.old_rse_1) assert_equal(rse['rse'], self.old_rse_1) protocols = [{ 'hostname': protocol['hostname'], 'scheme': protocol['scheme'], 'port': protocol['port'], 'impl': protocol['impl'], 'prefix': protocol['prefix'] } for protocol in get_rse_protocols(self.old_rse_1)['protocols']] assert_true({ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' } in protocols) assert_true({ 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl', 'prefix': '' } in protocols) attributes = list_rse_attributes(rse=self.old_rse_1) assert_equal(attributes['attr1'], 'test1') assert_equal(attributes['attr2'], 'test2') limits = get_rse_limits(rse=self.old_rse_1) assert_equal(limits['limit1'], 0) assert_equal(limits['limit2'], 2) transfer_limits = get_rse_transfer_limits(rse=self.old_rse_1) assert_equal( transfer_limits['activity1'][get_rse_id( self.old_rse_1)]['max_transfers'], 1) assert_equal( transfer_limits['activity2'][get_rse_id( self.old_rse_1)]['max_transfers'], 2) # Distances distance = get_distances(self.old_rse_id_1, self.old_rse_id_2)[0] assert_equal(distance['ranking'], 10) distance = get_distances(self.old_rse_id_1, self.old_rse_id_3)[0] assert_equal(distance['ranking'], 4) import_client.import_data(data=self.data2) import_client.import_data(data=self.data3)
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses pop = get_popularity(did) decision['popularity'] = pop or 0.0 if (last_accesses < 5) and (pop < 10.0): decision['error_reason'] = 'did not popular enough' return decision free_rses = self._rses available_reps = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if rep['state'] == ReplicaState.AVAILABLE: if rep['rse'] in free_rses: free_rses.remove(rep['rse']) available_reps.append(rep['rse']) num_reps += 1 decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision rse_ratios = {} space_info = self._fsc.get_rse_space() for rse in free_rses: rse_space = space_info[rse] penalty = self._penalties[rse] rse_ratios[rse] = float(rse_space['free']) / float( rse_space['total']) * 100.0 / penalty sorted_rses = sorted(rse_ratios.items(), key=itemgetter(1), reverse=True) decision['destination_rse'] = sorted_rses[0][0] decision['rse_ratios'] = sorted_rses self._penalties[sorted_rses[0][0]] = 10.0 return decision
def place(self, did): self.__update_penalties() self._added_bytes.trim() self._added_files.trim() decision = self.check_did(did) if 'error_reason' in decision: return decision meta = get_did(did[0], did[1]) available_reps = {} reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 space_info = self._fsc.get_rse_space() max_mbps = 0.0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) src_rse = rep['rse'] if 'site' not in rse_attr: continue src_site = rse_attr['site'] src_rse_info = get_rse(src_rse) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if src_rse_info['availability'] & 4 == 0: continue if rep['state'] == ReplicaState.AVAILABLE: if rep['available_length'] == 0: continue net_metrics = {} net_metrics_type = None for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'): net_metrics_type = metric_type net_metrics = self._nmc.getMbps(src_site, metric_type) if net_metrics: break if len(net_metrics) == 0: continue available_reps[src_rse] = {} for dst_site, mbps in net_metrics.items(): if src_site == dst_site: continue if dst_site in self._sites: if mbps > max_mbps: max_mbps = mbps dst_rse = self._sites[dst_site]['rse'] dst_rse_info = get_rse(dst_rse) if dst_rse_info['availability'] & 2 == 0: continue site_added_bytes = sum(self._added_bytes.get_series(dst_rse)) site_added_files = sum(self._added_files.get_series(dst_rse)) if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse): continue if ((site_added_files + meta['length']) > self._max_files_hour_rse): continue queued = self._nmc.getQueuedFiles(src_site, dst_site) # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued)) if queued > 0: continue rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1}) if src_rse not in self._src_penalties: self._src_penalties[src_rse] = 100.0 src_penalty = self._src_penalties[src_rse] if dst_rse not in self._dst_penalties: self._dst_penalties[dst_rse] = 100.0 dst_penalty = self._dst_penalties[dst_rse] free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0 available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type} num_reps += 1 # decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision src_dst_ratios = [] if max_mbps == 0.0: decision['error_reason'] = 'could not find enough network metrics' return decision for src, dsts in available_reps.items(): for dst, metrics in dsts.items(): if dst in available_reps: continue bdw = (metrics['mbps'] / max_mbps) * 100.0 src_penalty = self._src_penalties[src] dst_penalty = self._dst_penalties[dst] ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty src_dst_ratios.append((src, dst, ratio)) if len(src_dst_ratios) == 0: decision['error_reason'] = 'found no suitable src/dst for replication' return decision sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True) logging.debug(sorted_ratios) destination_rse = sorted_ratios[0][1] source_rse = sorted_ratios[0][0] decision['destination_rse'] = destination_rse decision['source_rse'] = source_rse # decision['rse_ratios'] = src_dst_ratios self._dst_penalties[destination_rse] = 10.0 self._src_penalties[source_rse] = 10.0 self._added_cache.add_dataset(':'.join(did)) self._added_bytes.add_point(destination_rse, meta['bytes']) self._added_files.add_point(destination_rse, meta['length']) self._added_bytes.add_point('total', meta['bytes']) self._added_files.add_point('total', meta['length']) return decision
def test_importer_rest(self): """ IMPORTER (REST): test import. """ mw = [] headers1 = { 'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******' } r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) token = str(r1.header('X-Rucio-Auth-Token')) headers2 = { 'X-Rucio-Type': 'user', 'X-Rucio-Account': 'root', 'X-Rucio-Auth-Token': str(token) } r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data1)) assert_equal(r2.status, 201) # RSE that not existed before rse = get_rse(self.new_rse) assert_equal(rse['availability'], 5) assert_equal(rse['city'], 'NewCity') assert_equal(rse['rse_type'], RSEType.TAPE) protocols = [{ 'hostname': protocol['hostname'], 'scheme': protocol['scheme'], 'port': protocol['port'] } for protocol in get_rse_protocols(self.new_rse)['protocols']] assert_true({ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000 } in protocols) attributes = list_rse_attributes(rse=self.new_rse) assert_equal(attributes['attr1'], 'test') limits = get_rse_limits(rse=self.new_rse) assert_equal(limits['limit1'], 0) transfer_limits = get_rse_transfer_limits(rse=self.new_rse) assert_equal( transfer_limits['activity1'][get_rse_id( self.new_rse)]['max_transfers'], 1) # RSE 1 that already existed before rse = get_rse(self.old_rse_1) assert_equal(rse['rse'], self.old_rse_1) protocols = [{ 'hostname': protocol['hostname'], 'scheme': protocol['scheme'], 'port': protocol['port'], 'impl': protocol['impl'], 'prefix': protocol['prefix'] } for protocol in get_rse_protocols(self.old_rse_1)['protocols']] assert_true({ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' } in protocols) assert_true({ 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl', 'prefix': '' } in protocols) attributes = list_rse_attributes(rse=self.old_rse_1) assert_equal(attributes['attr1'], 'test1') assert_equal(attributes['attr2'], 'test2') limits = get_rse_limits(rse=self.old_rse_1) assert_equal(limits['limit1'], 0) assert_equal(limits['limit2'], 2) transfer_limits = get_rse_transfer_limits(rse=self.old_rse_1) assert_equal( transfer_limits['activity1'][get_rse_id( self.old_rse_1)]['max_transfers'], 1) assert_equal( transfer_limits['activity2'][get_rse_id( self.old_rse_1)]['max_transfers'], 2) # Distances distance = get_distances(self.old_rse_id_1, self.old_rse_id_2)[0] assert_equal(distance['ranking'], 10) distance = get_distances(self.old_rse_id_1, self.old_rse_id_3)[0] assert_equal(distance['ranking'], 4) r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data2)) assert_equal(r2.status, 201) r2 = TestApp(import_app.wsgifunc(*mw)).post( '/', headers=headers2, expect_errors=True, params=render_json(**self.data3)) assert_equal(r2.status, 201)
def rebalance_rse(rse, max_bytes=1E9, max_files=None, dry_run=False, exclude_expression=None, comment=None, force_expression=None, mode=None, priority=3, source_replica_expression=None, session=None): """ Rebalance data from an RSE :param rse: RSE to rebalance data from. :param max_bytes: Maximum amount of bytes to rebalance. :param max_files: Maximum amount of files to rebalance. :param dry_run: Only run in dry-run mode. :param exclude_expression: Exclude this rse_expression from being target_rses. :param comment: Comment to set on the new rules. :param force_expression: Force a specific rse_expression as target. :param mode: BB8 mode to execute (None=normal, 'decomission'=Decomission mode) :param priority: Priority of the new created rules. :param source_replica_expression: Source replica expression of the new created rules. :param session: The database session. :returns: List of rebalanced datasets. """ rebalanced_bytes = 0 rebalanced_files = 0 rebalanced_datasets = [] rse_attributes = list_rse_attributes(rse=rse, session=session) print '***************************' print 'BB8 - Execution Summary' print 'Mode: %s' % ('STANDARD' if mode is None else mode.upper()) print 'Dry Run: %s' % (dry_run) print '***************************' print 'scope:name rule_id bytes(Gb) target_rse child_rule_id' for scope, name, rule_id, rse_expression, subscription_id, bytes, length in list_rebalance_rule_candidates( rse=rse, mode=mode): if force_expression is not None and subscription_id is not None: continue if rebalanced_bytes + bytes > max_bytes: continue if max_files: if rebalanced_files + length > max_files: continue try: other_rses = [ r['rse_id'] for r in get_dataset_locks(scope, name, session=session) ] # Select the target RSE for this rule try: target_rse_exp = select_target_rse( current_rse=rse, rse_expression=rse_expression, subscription_id=subscription_id, rse_attributes=rse_attributes, other_rses=other_rses, exclude_expression=exclude_expression, force_expression=force_expression, session=session) # Rebalance this rule if not dry_run: child_rule_id = rebalance_rule( parent_rule_id=rule_id, activity='Data rebalancing', rse_expression=target_rse_exp, priority=priority, source_replica_expression=source_replica_expression, comment=comment) else: child_rule_id = '' except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound, InsufficientAccountLimit): continue print '%s:%s %s %d %s %s' % (scope, name, str(rule_id), int(bytes / 1E9), target_rse_exp, child_rule_id) if 'Concurrent' in str(child_rule_id): print str(child_rule_id) continue rebalanced_bytes += bytes rebalanced_files += length rebalanced_datasets.append( (scope, name, bytes, length, target_rse_exp, rule_id, child_rule_id)) except Exception as error: print 'Exception %s occured while rebalancing %s:%s, rule_id: %s!' % ( str(error), scope, name, str(rule_id)) raise error print 'BB8 is rebalancing %d Gb of data (%d rules)' % (int( rebalanced_bytes / 1E9), len(rebalanced_datasets)) return rebalanced_datasets
def rebalance_rse(rse_id, max_bytes=1E9, max_files=None, dry_run=False, exclude_expression=None, comment=None, force_expression=None, mode=None, priority=3, source_replica_expression='*\\bb8-enabled=false', session=None, logger=logging.log): """ Rebalance data from an RSE :param rse_id: RSE to rebalance data from. :param max_bytes: Maximum amount of bytes to rebalance. :param max_files: Maximum amount of files to rebalance. :param dry_run: Only run in dry-run mode. :param exclude_expression: Exclude this rse_expression from being target_rses. :param comment: Comment to set on the new rules. :param force_expression: Force a specific rse_expression as target. :param mode: BB8 mode to execute (None=normal, 'decomission'=Decomission mode) :param priority: Priority of the new created rules. :param source_replica_expression: Source replica expression of the new created rules. :param session: The database session. :param logger: Logger. :returns: List of rebalanced datasets. """ rebalanced_bytes = 0 rebalanced_files = 0 rebalanced_datasets = [] rse_attributes = list_rse_attributes(rse_id=rse_id, session=session) src_rse = get_rse_name(rse_id=rse_id) logger(logging.INFO, '***************************') logger(logging.INFO, 'BB8 - Execution Summary') logger(logging.INFO, 'Mode: %s' % ('STANDARD' if mode is None else mode.upper())) logger(logging.INFO, 'Dry Run: %s' % (dry_run)) logger(logging.INFO, '***************************') for scope, name, rule_id, rse_expression, subscription_id, bytes_, length, fsize in list_rebalance_rule_candidates( rse_id=rse_id, mode=mode): if force_expression is not None and subscription_id is not None: continue if rebalanced_bytes + bytes_ > max_bytes: continue if max_files: if rebalanced_files + length > max_files: continue try: rule = get_rule(rule_id=rule_id) other_rses = [ r['rse_id'] for r in get_dataset_locks(scope, name, session=session) ] # Select the target RSE for this rule try: target_rse_exp = select_target_rse( parent_rule=rule, current_rse_id=rse_id, rse_expression=rse_expression, subscription_id=subscription_id, rse_attributes=rse_attributes, other_rses=other_rses, exclude_expression=exclude_expression, force_expression=force_expression, session=session) # Rebalance this rule if not dry_run: child_rule_id = rebalance_rule( parent_rule=rule, activity='Data rebalancing', rse_expression=target_rse_exp, priority=priority, source_replica_expression=source_replica_expression, comment=comment) else: child_rule_id = '' except (InsufficientTargetRSEs, DuplicateRule, RuleNotFound, InsufficientAccountLimit) as err: logger(logging.ERROR, str(err)) continue if child_rule_id is None: logger( logging.WARNING, 'A rule for %s:%s already exists on %s. It cannot be rebalanced', scope, name, target_rse_exp) continue logger( logging.INFO, 'Rebalancing %s:%s rule %s (%f GB) from %s to %s. New rule %s', scope, name, str(rule_id), bytes_ / 1E9, rule['rse_expression'], target_rse_exp, child_rule_id) rebalanced_bytes += bytes_ rebalanced_files += length rebalanced_datasets.append( (scope, name, bytes_, length, target_rse_exp, rule_id, child_rule_id)) except Exception as error: logger( logging.ERROR, 'Exception %s occured while rebalancing %s:%s, rule_id: %s!', str(error), scope, name, str(rule_id)) logger(logging.INFO, 'BB8 is rebalancing %d GB of data (%d rules) from %s', rebalanced_bytes / 1E9, len(rebalanced_datasets), src_rse) return rebalanced_datasets
def define_eol(scope, name, rses, session=None): """ ATLAS policy for rules on SCRATCHDISK :param scope: Scope of the DID. :param name: Name of the DID. :param rses: List of RSEs. :param session: The database session in use. """ vo_name = get_vo() if vo_name != 'atlas': return None # Check if on ATLAS managed space if [ rse for rse in rses if list_rse_attributes( rse=None, rse_id=rse['id'], session=session).get('type') in ['LOCALGROUPDISK', 'LOCALGROUPTAPE', 'GROUPDISK', 'GROUPTAPE'] ]: return None # Now check the lifetime policy try: did = session.query(models.DataIdentifier).filter( models.DataIdentifier.scope == scope, models.DataIdentifier.name == name).one() except NoResultFound: return None policy_dict = get_lifetime_policy() did_type = 'other' if scope.startswith('mc'): did_type = 'mc' elif scope.startswith('data'): did_type = 'data' elif scope.startswith('valid'): did_type = 'valid' else: did_type = 'other' for policy in policy_dict[did_type]: if 'exclude' in policy: to_exclude = False for key in policy['exclude']: meta_key = None if key not in [ 'datatype', 'project', ]: if key == 'stream': meta_key = 'stream_name' elif key == 'tags': meta_key = 'version' else: meta_key = key values = policy['exclude'][key] for value in values: value = value.replace('%', '.*') if meta_key and did[meta_key] and value and re.match( value, did[meta_key]): to_exclude = True break if to_exclude: break if to_exclude: continue if 'include' in policy: match_policy = True for key in policy['include']: meta_key = None if key not in [ 'datatype', 'project', ]: if key == 'stream': meta_key = 'stream_name' elif key == 'tags': meta_key = 'version' else: continue else: meta_key = key values = policy['include'][key] to_keep = False for value in values: value = value.replace('%', '.*') if meta_key and did[meta_key] and value and re.match( value, did[meta_key]): to_keep = True break match_policy = match_policy and to_keep if not to_keep: match_policy = False break if match_policy: if int(policy['age']) >= 12: years = int(int(policy['age']) / 12) months = int(policy['age']) - years * 12 lifetime_value = 365 * years + 30 * months else: lifetime_value = int(policy['age']) * 30 if int(policy['extension']) >= 12: years = int(int(policy['extension']) / 12) months = int(policy['extension']) - years * 12 extension = 365 * years + 30 * months else: extension = int(policy['extension']) * 30 default_eol_at = did.created_at + timedelta( days=lifetime_value) if default_eol_at > datetime.utcnow(): eol_at = default_eol_at elif did.accessed_at: eol_at = did.accessed_at + timedelta(days=extension) if eol_at < default_eol_at: eol_at = default_eol_at else: eol_at = default_eol_at return eol_at return None
else: return False if response['new_state']: request.set_request_state(response['request_id'], response['new_state'], session=session) if response['new_state'] == RequestState.DONE: rse_name = response['dst_rse'] rse_update_name = rse_name req = request.get_request(response['request_id'], session=session) if req['request_type'] == RequestType.STAGEIN: rse_update_name = rse_core.list_rse_attributes(response['dst_rse'], session=session)['staging_buffer'] logging.debug('OVERRIDE REPLICA DID %s:%s RSE %s TO %s' % (response['scope'], response['name'], response['dst_rse'], rse_update_name)) try: tss = time.time() logging.debug('UPDATE REPLICA STATE DID %s:%s RSE %s' % (response['scope'], response['name'], rse_update_name)) # make sure we do not leave the transaction try: # try quickly replica.update_replicas_states([{'rse': rse_update_name, 'scope': response['scope'], 'name': response['name'], 'state': ReplicaState.AVAILABLE}], nowait=False, session=session)
def __check_rse_usage(rse: str, rse_id: str, greedy: bool = False, logger: 'Callable' = logging.log) -> 'Tuple[int, bool]': """ Internal method to check RSE usage and limits. :param rse: The RSE name. :param rse_id: The RSE id. :param greedy: If True, needed_free_space will be set to 1TB regardless of actual rse usage. :returns: needed_free_space, only_delete_obsolete. """ result = REGION.get('rse_usage_%s' % rse_id) if result is NO_VALUE: needed_free_space, used, free, obsolete = 0, 0, 0, 0 # First of all check if greedy mode is enabled for this RSE or generally attributes = list_rse_attributes(rse_id=rse_id) rse_attr_greedy = attributes.get('greedyDeletion', False) if greedy or rse_attr_greedy: result = (1000000000000, False) REGION.set('rse_usage_%s' % rse_id, result) return result # Get RSE limits limits = get_rse_limits(rse_id=rse_id) min_free_space = limits.get('MinFreeSpace', 0) # Check from which sources to get used and total spaces # Default is storage source_for_total_space = attributes.get('source_for_total_space', 'storage') source_for_used_space = attributes.get('source_for_used_space', 'storage') logger( logging.DEBUG, 'RSE: %s, source_for_total_space: %s, source_for_used_space: %s', rse, source_for_total_space, source_for_used_space) # Get total, used and obsolete space rse_usage = get_rse_usage(rse_id=rse_id) usage = [entry for entry in rse_usage if entry['source'] == 'obsolete'] for var in usage: obsolete = var['used'] break usage = [ entry for entry in rse_usage if entry['source'] == source_for_total_space ] # If no information is available about disk space, do nothing except if there are replicas with Epoch tombstone if not usage: if not obsolete: result = (needed_free_space, False) REGION.set('rse_usage_%s' % rse_id, result) return result result = (obsolete, True) REGION.set('rse_usage_%s' % rse_id, result) return result # Extract the total and used space for var in usage: total, used = var['total'], var['used'] break if source_for_total_space != source_for_used_space: usage = [ entry for entry in rse_usage if entry['source'] == source_for_used_space ] if not usage: result = (needed_free_space, False) REGION.set('rse_usage_%s' % rse_id, result) return result for var in usage: used = var['used'] break free = total - used if min_free_space: needed_free_space = min_free_space - free # If needed_free_space negative, nothing to delete except if some Epoch tombstoned replicas if needed_free_space <= 0: result = (obsolete, True) else: result = (needed_free_space, False) REGION.set('rse_usage_%s' % rse_id, result) return result return result
def __init__(self, account, rses, weight, copies, ignore_account_limit=False, session=None): """ Initialize the RSE Selector. :param account: Account owning the rule. :param rses: List of rse dictionaries. :param weight: Weighting to use. :param copies: Number of copies to create. :param ignore_account_limit: Flag if the quota should be ignored. :param session: DB Session in use. :raises: InvalidRuleWeight, InsufficientAccountLimit, InsufficientTargetRSEs """ self.account = account self.rses = [] # [{'rse_id':, 'weight':, 'staging_area'}] self.copies = copies if weight is not None: for rse in rses: attributes = list_rse_attributes(rse=None, rse_id=rse['id'], session=session) availability_write = True if rse.get('availability', 7) & 2 else False if weight not in attributes: continue # The RSE does not have the required weight set, therefore it is ignored try: self.rses.append({'rse_id': rse['id'], 'weight': float(attributes[weight]), 'mock_rse': attributes.get('mock', False), 'availability_write': availability_write, 'staging_area': rse['staging_area']}) except ValueError: raise InvalidRuleWeight('The RSE with id \'%s\' has a non-number specified for the weight \'%s\'' % (rse['id'], weight)) else: for rse in rses: mock_rse = has_rse_attribute(rse['id'], 'mock', session=session) availability_write = True if rse.get('availability', 7) & 2 else False self.rses.append({'rse_id': rse['id'], 'weight': 1, 'mock_rse': mock_rse, 'availability_write': availability_write, 'staging_area': rse['staging_area']}) if len(self.rses) < self.copies: raise InsufficientTargetRSEs('Target RSE set not sufficient for number of copies. (%s copies requested, RSE set size %s)' % (self.copies, len(self.rses))) if has_account_attribute(account=account, key='admin', session=session) or ignore_account_limit: for rse in self.rses: rse['quota_left'] = float('inf') else: for rse in self.rses: if rse['mock_rse']: rse['quota_left'] = float('inf') else: # TODO: Add RSE-space-left here! limit = get_account_limit(account=account, rse_id=rse['rse_id'], session=session) if limit is None: rse['quota_left'] = 0 else: rse['quota_left'] = limit - get_counter(rse_id=rse['rse_id'], account=account, session=session)['bytes'] self.rses = [rse for rse in self.rses if rse['quota_left'] > 0] if len(self.rses) < self.copies: raise InsufficientAccountLimit('There is insufficient quota on any of the target RSE\'s to fullfill the operation.')