def predict_by_rule_id(self, rule_id, session=None): """ Make a prediction based on the rule id. :param rule_id: Some rule id. :returns: Number of seconds the rules is going to take till is complete. """ requests = session.query( models.Request).filter(models.Request.rule_id == rule_id).all() transfers = [] for req in requests: try: transfers.append({ 'src': get_rse_name(req.source_rse_id), 'dst': get_rse_name(req.dest_rse_id), 'activity': req.activity, 'size': req.bytes }) except RSENotFound: continue results = self.predict(transfers) max_q = 0 max_n = 0 if len(transfers) == 0: logging.warning( 'Can\'t calculate TTC for the transfers. Probably no sources selected yet.' ) return -1, -1 for transfer in results: if transfer['qtime'] > max_q: max_q = transfer['qtime'] if transfer['ntime'] > max_n: max_n = transfer['ntime'] return max_q, max_n
def export_data(issuer): """ Export data from Rucio. :param issuer: the issuer. """ kwargs = {'issuer': issuer} if not permission.has_permission( issuer=issuer, action='export', kwargs=kwargs): raise exception.AccessDenied('Account %s can not export data' % issuer) data = exporter.export_data() rses = {} distances = {} for rse_id in data['rses']: rse = data['rses'][rse_id] rses[get_rse_name(rse_id=rse_id)] = rse data['rses'] = rses for src_id in data['distances']: dests = data['distances'][src_id] src = get_rse_name(rse_id=src_id) distances[src] = {} for dest_id in dests: dest = get_rse_name(rse_id=dest_id) distances[src][dest] = dests[dest_id] data['distances'] = distances return data
def export_data(issuer, vo='def'): """ Export data from Rucio. :param issuer: the issuer. :param vo: the VO of the issuer. """ kwargs = {'issuer': issuer} if not permission.has_permission( issuer=issuer, vo=vo, action='export', kwargs=kwargs): raise exception.AccessDenied('Account %s can not export data' % issuer) data = exporter.export_data(vo=vo) rses = {} distances = {} for rse_id in data['rses']: rse = data['rses'][rse_id] rses[get_rse_name(rse_id=rse_id)] = rse data['rses'] = rses for src_id, tmp in data['distances']: src = get_rse_name(rse_id=src_id) distances[src] = {} for dst_id, dists in tmp: dst = get_rse_name(rse_id=dst_id) distances[src][dst] = dists data['distances'] = distances return data
def export_data(issuer, distance=True, vo='def', session=None): """ Export data from Rucio. :param issuer: the issuer. :param distance: To enable the reporting of distance. :param vo: the VO of the issuer. :param session: The database session in use. """ kwargs = {'issuer': issuer} if not permission.has_permission( issuer=issuer, vo=vo, action='export', kwargs=kwargs, session=session): raise exception.AccessDenied('Account %s can not export data' % issuer) data = exporter.export_data(distance=distance, vo=vo, session=session) rses = {} distances = {} for rse_id in data['rses']: rse = data['rses'][rse_id] rses[get_rse_name(rse_id=rse_id, session=session)] = rse data['rses'] = rses if distance: for src_id in data['distances']: dests = data['distances'][src_id] src = get_rse_name(rse_id=src_id, session=session) distances[src] = {} for dest_id in dests: dest = get_rse_name(rse_id=dest_id, session=session) distances[src][dest] = dests[dest_id] data['distances'] = distances return data
def select_target_rse(parent_rule, current_rse_id, rse_expression, subscription_id, rse_attributes, other_rses=[], exclude_expression=None, force_expression=None, session=None): """ Select a new target RSE for a rebalanced rule. :param parent_rule rule that is rebalanced. :param current_rse_id: RSE of the source. :param rse_expression: RSE Expression of the source rule. :param subscription_id: Subscription ID of the source rule. :param rse_attributes: The attributes of the source rse. :param other_rses: Other RSEs with existing dataset replicas. :param exclude_expression: Exclude this rse_expression from being target_rses. :param force_expression: Force a specific rse_expression as target. :param session: The DB Session :returns: New RSE expression """ if rse_attributes['type'] != 'DATADISK' and force_expression is None: print('WARNING: dest RSE(s) has to be provided with --force-expression for rebalancing of non-datadisk RSES.') raise InsufficientTargetRSEs current_rse = get_rse_name(rse_id=current_rse_id) current_rse_expr = current_rse # if parent rule has a vo, enforce it vo = parent_rule['scope'].vo if exclude_expression: target_rse = '(%s)\\%s' % (exclude_expression, current_rse_expr) else: target_rse = current_rse_expr rses = parse_expression(expression=rse_expression, filter={'vo': vo}, session=session) # TODO: dest rse selection should be configurable, there might be cases when tier is not defined, or concept of DATADISKS is not present. # if subscription_id: # pass # # get_subscription_by_id(subscription_id, session) if force_expression is not None: if parent_rule['grouping'] != RuleGrouping.NONE: rses = parse_expression(expression='(%s)\\%s' % (force_expression, target_rse), filter={'vo': vo, 'availability_write': True}, session=session) else: # in order to avoid replication of the part of distributed dataset not present at rabalanced rse -> rses in force_expression # this will be extended with development of delayed rule rses = parse_expression(expression='((%s)|(%s))\\%s' % (force_expression, rse_expression, target_rse), filter={'vo': vo, 'availability_write': True}, session=session) elif len(rses) > 1: # Just define the RSE Expression without the current_rse return '(%s)\\%s' % (rse_expression, target_rse) else: if rse_attributes['tier'] is True or int(rse_attributes['tier']) == 1: # Tier 1 should go to another Tier 1 expression = '(tier=1&type=DATADISK)\\{}'.format(target_rse) elif int(rse_attributes['tier']) == 2: # Tier 2 should go to another Tier 2 expression = '(tier=2&type=DATADISK)\\{}'.format(target_rse) elif int(rse_attributes['tier']) == 3: # Tier 3 will go to Tier 2, since we don't have enough t3s expression = '((tier=2&type=DATADISK)\\datapolicynucleus=1)\\{}'.format(target_rse) rses = parse_expression(expression=expression, filter={'vo': vo, 'availability_write': True}, session=session) rseselector = RSESelector(account=InternalAccount('ddmadmin', vo=vo), rses=rses, weight='freespace', copies=1, ignore_account_limit=True, session=session) return get_rse_name([rse_id for rse_id, _, _ in rseselector.select_rse(size=0, preferred_rse_ids=[], blocklist=other_rses)][0], session=session)
def get_account_usage(account, rse_id=None, session=None): """ Read the account usage and connect it with (if available) the account limits of the account. :param account: The account to read. :param rse_id: The rse_id to read (If none, get all). :param session: Database session in use. :returns: List of dicts {'rse_id', 'bytes_used', 'files_used', 'bytes_limit'} """ if not rse_id: # All RSESs limits = get_account_limits(account=account, session=session) counters = session.query(models.AccountUsage).filter_by(account=account).all() else: # One RSE limits = get_account_limits(account=account, rse_ids=[rse_id], session=session) counters = session.query(models.AccountUsage).filter_by(account=account, rse_id=rse_id).all() result_list = [] for counter in counters: if counter.bytes > 0 or counter.files > 0 or rse_id in limits.keys(): result_list.append({'rse': get_rse_name(rse_id=counter.rse_id, session=session), 'bytes': counter.bytes, 'files': counter.files, 'bytes_limit': limits.get(counter.rse_id, 0), 'bytes_remaining': limits.get(counter.rse_id, 0) - counter.bytes}) return result_list
def delete_volatile_replicas(rse_id, replicas, session=None): """ Bulk delete volatile replicas. :param rse_id: the rse id. :param replicas: the list of volatile replicas. :param session: The database session in use. :returns: True is successful. """ # first check that the rse is a volatile one try: session.query(models.RSE.id).filter_by(rse_id=rse_id, volatile=True).one() except NoResultFound: raise exception.UnsupportedOperation( 'No volatile rse found for %s !' % get_rse_name(rse_id=rse_id, session=session)) conditions = [] for replica in replicas: conditions.append( and_(models.RSEFileAssociation.scope == replica['scope'], models.RSEFileAssociation.name == replica['name'])) if conditions: session.query(models.RSEFileAssociation).\ filter(models.RSEFileAssociation.rse_id == rse_id).\ filter(or_(*conditions)).\ delete(synchronize_session=False)
def declare_bad_file_replicas(pfns, reason, issuer): """ Declare a list of bad replicas. :param pfns: The list of PFNs. :param reason: The reason of the loss. :param issuer: The issuer account. """ kwargs = {} if not permission.has_permission( issuer=issuer, action='declare_bad_file_replicas', kwargs=kwargs): raise exception.AccessDenied( 'Account %s can not declare bad replicas' % (issuer)) issuer = InternalAccount(issuer) replicas = replica.declare_bad_file_replicas(pfns=pfns, reason=reason, issuer=issuer, status=BadFilesStatus.BAD) for k in list(replicas): try: rse = get_rse_name(rse_id=k) replicas[rse] = replicas.pop(k) except exception.RSENotFound: pass return replicas
def get_account_usage(account, rse_id=None, session=None): """ Read the account usage and connect it with (if available) the account limits of the account. :param account: The account to read. :param rse_id: The rse_id to read (If none, get all). :param session: Database session in use. :returns: List of dicts {'rse_id', 'bytes_used', 'files_used', 'bytes_limit'} """ if not rse_id: # All RSESs limits = get_account_limits(account=account, session=session) counters = session.query(models.AccountCounter).filter_by(account=account).all() else: # One RSE limits = get_account_limits(account=account, rse_ids=[rse_id], session=session) counters = session.query(models.AccountCounter).filter_by(account=account, rse_id=rse_id).all() result_list = [] for counter in counters: if counter.bytes > 0 or counter.files > 0 or rse_id in limits.keys(): result_list.append({'rse': get_rse_name(rse_id=counter.rse_id, session=session), 'bytes': counter.bytes, 'files': counter.files, 'bytes_limit': limits.get(counter.rse_id, float("Inf")), 'bytes_remaining': limits.get(counter.rse_id, float("Inf"))}) return result_list
def get_replica_locks_for_rule_id(rule_id, session=None): """ Get the active replica locks for a rule_id. :param rule_id: Filter on rule_id. :param session: The db session. :return: List of dicts {'scope':, 'name':, 'rse': ..., 'state': ...} :raises: NoResultFound """ locks = [] query = session.query(models.ReplicaLock).filter_by(rule_id=rule_id) for row in query: locks.append({ 'scope': row.scope, 'name': row.name, 'rse_id': row.rse_id, 'rse': get_rse_name(rse_id=row.rse_id, session=session), 'state': row.state, 'rule_id': row.rule_id }) return locks
def get_dataset_locks_by_rse_id(rse_id, session=None): """ Get the dataset locks of an RSE. :param rse_id: RSE id to get the locks from. :param session: The db session. :return: List of dicts {'rse_id': ..., 'state': ...} """ query = session.query(models.DatasetLock.rse_id, models.DatasetLock.scope, models.DatasetLock.name, models.DatasetLock.rule_id, models.DatasetLock.account, models.DatasetLock.state, models.DatasetLock.length, models.DatasetLock.bytes, models.DatasetLock.accessed_at).filter_by(rse_id=rse_id).\ with_hint(models.DatasetLock, "index(DATASET_LOCKS DATASET_LOCKS_RSE_ID_IDX)", 'oracle') for rse_id, scope, name, rule_id, account, state, length, bytes, accessed_at in query.yield_per(500): yield {'rse_id': rse_id, 'rse': get_rse_name(rse_id, session=session), 'scope': scope, 'name': name, 'rule_id': rule_id, 'account': account, 'state': state, 'length': length, 'bytes': bytes, 'accessed_at': accessed_at}
def test_export_rest(self): """ EXPORT (REST): Test the export of data.""" mw = [] headers1 = { 'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******' } r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) token = str(r1.header('X-Rucio-Auth-Token')) headers2 = { 'X-Rucio-Type': 'user', 'X-Rucio-Account': 'root', 'X-Rucio-Auth-Token': str(token) } r2 = TestApp(export_app.wsgifunc(*mw)).get('/', headers=headers2, expect_errors=True) rses = export_rses() sanitised = {} for rse_id in rses: sanitised[get_rse_name(rse_id=rse_id)] = rses[rse_id] rses = sanitised assert_equal(r2.status, 200) assert_equal( parse_response(r2.body), parse_response( render_json(**{ 'rses': rses, 'distances': self.distances })))
def get_dataset_locks(scope, name, session=None): """ Get the dataset locks of a dataset :param scope: Scope of the dataset. :param name: Name of the dataset. :param session: The db session. :return: List of dicts {'rse_id': ..., 'state': ...} """ query = session.query(models.DatasetLock.rse_id, models.DatasetLock.scope, models.DatasetLock.name, models.DatasetLock.rule_id, models.DatasetLock.account, models.DatasetLock.state, models.DatasetLock.length, models.DatasetLock.bytes, models.DatasetLock.accessed_at).filter_by( scope=scope, name=name) dict = {} for rse_id, scope, name, rule_id, account, state, length, bytes, accessed_at in query.yield_per( 500): if rse_id not in dict: dict[rse_id] = get_rse_name(rse_id, session=session) yield { 'rse_id': rse_id, 'rse': dict[rse_id], 'scope': scope, 'name': name, 'rule_id': rule_id, 'account': account, 'state': state, 'length': length, 'bytes': bytes, 'accessed_at': accessed_at }
def add_distance(src_rse_id, dest_rse_id, ranking=None, agis_distance=None, geoip_distance=None, active=None, submitted=None, finished=None, failed=None, transfer_speed=None, session=None): """ Add a src-dest distance. :param src_rse_id: The source RSE ID. :param dest_rse_id: The destination RSE ID. :param ranking: Ranking as an integer. :param agis_distance: AGIS Distance as an integer. :param geoip_distance: GEOIP Distance as an integer. :param active: Active FTS transfers as an integer. :param submitted: Submitted FTS transfers as an integer. :param finished: Finished FTS transfers as an integer. :param failed: Failed FTS transfers as an integer. :param transfer_speed: FTS transfer speed as an integer. :param session: The database session to use. """ try: new_distance = Distance(src_rse_id=src_rse_id, dest_rse_id=dest_rse_id, ranking=ranking, agis_distance=agis_distance, geoip_distance=geoip_distance, active=active, submitted=submitted, finished=finished, failed=failed, transfer_speed=transfer_speed) new_distance.save(session=session) except IntegrityError: raise exception.Duplicate('Distance from %s to %s already exists!' % (get_rse_name(rse_id=src_rse_id, session=session), get_rse_name(rse_id=dest_rse_id, session=session))) except DatabaseError as error: raise exception.RucioException(error.args)
def __dump_url(rse_id, logger=logging.log): """ getting potential urls of the dump over last week :param rse_id: RSE where the dump is released. :param logger: Logger. """ rse = get_rse_name(rse_id=rse_id) vo = get_rse_vo(rse_id=rse_id) # get the date of the most recent dump today = date.today() dump_dates = [] dump_production_day = config_get('bb8', 'dump_production_day', raise_exception=False, default=None) if dump_production_day is None: for idx in range(0, 7): dump_date = today - timedelta(idx) dump_dates.append(dump_date.strftime('%d-%m-%Y')) else: weekdays = { 'Sunday': 6, 'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5 } if dump_production_day not in weekdays: logger( logging.WARNING, 'ERROR: please set the day of a dump creation in bb8 config correctly, e.g. Monday' ) return False today_idx = (today.weekday() - weekdays[dump_production_day]) % 7 dump_date = today - timedelta(today_idx) dump_dates = [dump_date.strftime('%d-%m-%Y')] # getting structure (template) of url location of a dump url_template_str = config_get( 'bb8', 'dump_url_template', raise_exception=False, default= 'http://rucio-analytix.cern.ch:8080/LOCKS/GetFileFromHDFS?date=${date}&rse=${rse}' ) url_template = Template(url_template_str) # populating url template urls = [] for d in dump_dates: url = url_template.substitute({'date': d, 'rse': rse, 'vo': vo}) urls.append(url) return urls
def __check_rse_usage(rse_id): """ Internal method to check RSE usage and limits. :param rse_id: the rse id. :returns : max_being_deleted_files, needed_free_space, used, free. """ max_being_deleted_files, needed_free_space, used, free = None, None, None, None rse = get_rse_name(rse_id=rse_id) # Get RSE limits limits = rse_core.get_rse_limits(rse_id=rse_id) if not limits and 'MinFreeSpace' not in limits and 'MaxBeingDeletedFiles' not in limits: return max_being_deleted_files, needed_free_space, used, free min_free_space = limits.get('MinFreeSpace') max_being_deleted_files = limits.get('MaxBeingDeletedFiles') # Check from which sources to get used and total spaces # Default is storage source_for_total_space, source_for_used_space = 'storage', 'storage' values = get_rse_attribute(rse_id=rse_id, key='source_for_total_space') if values: source_for_total_space = values[0] values = get_rse_attribute(rse_id=rse_id, key='source_for_used_space') if values: source_for_used_space = values[0] logging.debug( 'RSE: %(rse)s, source_for_total_space: %(source_for_total_space)s, ' 'source_for_used_space: %(source_for_used_space)s' % locals()) # Get total and used space usage = rse_core.get_rse_usage(rse_id=rse_id, source=source_for_total_space) if not usage: return max_being_deleted_files, needed_free_space, used, free for var in usage: total, used = var['total'], var['used'] break if source_for_total_space != source_for_used_space: usage = rse_core.get_rse_usage(rse_id=rse_id, source=source_for_used_space) if not usage: return max_being_deleted_files, needed_free_space, None, free for var in usage: used = var['used'] break free = total - used if min_free_space: needed_free_space = min_free_space - free return max_being_deleted_files, needed_free_space, used, free
def __dump_url(rse_id, logger=logging.log): """ getting potential urls of the dump over last week :param rse_id: RSE where the dump is released. :param logger: Logger. """ rse = get_rse_name(rse_id=rse_id) vo = get_rse_vo(rse_id=rse_id) # get the date of the most recent dump today = date.today() dump_dates = [] dump_production_day = config_get("bb8", "dump_production_day", raise_exception=False, default=None) if dump_production_day is None: for idx in range(0, 7): dump_date = today - timedelta(idx) dump_dates.append(dump_date.strftime("%d-%m-%Y")) else: weekdays = { "Sunday": 6, "Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4, "Saturday": 5, } if dump_production_day not in weekdays: logger( logging.WARNING, "ERROR: please set the day of a dump creation in bb8 config correctly, e.g. Monday", ) return False today_idx = (today.weekday() - weekdays[dump_production_day]) % 7 dump_date = today - timedelta(today_idx) dump_dates = [dump_date.strftime("%d-%m-%Y")] # getting structure (template) of url location of a dump url_template_str = config_get( "bb8", "dump_url_template", raise_exception=False, ) url_template = Template(url_template_str) # populating url template urls = [] for d in dump_dates: url = url_template.substitute({"date": d, "rse": rse, "vo": vo}) urls.append(url) return urls
def get_distance(source, destination, issuer): """ Get distances between rses. :param source: The source RSE. :param destination: The destination RSE. :param issuer: The issuer account. :returns distance: List of dictionaries. """ distances = distance_module.get_distances( src_rse_id=rse_module.get_rse_id(source), dest_rse_id=rse_module.get_rse_id(destination)) for d in distances: if 'src_rse_id' in d and d['src_rse_id'] is not None: d['src_rse'] = rse_module.get_rse_name(rse_id=d['src_rse_id']) if 'dest_rse_id' in d and d['dest_rse_id'] is not None: d['dest_rse'] = rse_module.get_rse_name(rse_id=d['dest_rse_id']) return distances
def get_account_limits(account): """ Lists the limitation names/values for the specified account name. REST API: http://<host>:<port>/rucio/account/<account>/limits :param account: The account name. :returns: The account limits. """ rse_instead_id = {} for elem in account_limit_core.get_account_limits(account=account).items(): rse_instead_id[get_rse_name(elem[0])] = elem[1] return rse_instead_id
def declare_bad_file_replicas(pfns, reason, issuer, vo='def', session=None): """ Declare a list of bad replicas. :param pfns: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}. :param reason: The reason of the loss. :param issuer: The issuer account. :param vo: The VO to act on. :param session: The database session in use. """ kwargs = {} rse_map = {} if not permission.has_permission(issuer=issuer, vo=vo, action='declare_bad_file_replicas', kwargs=kwargs, session=session): raise exception.AccessDenied( 'Account %s can not declare bad replicas' % (issuer)) issuer = InternalAccount(issuer, vo=vo) type_ = type(pfns[0]) if len(pfns) > 0 else None for pfn in pfns: if not isinstance(pfn, type_): raise exception.InvalidType( 'The PFNs must be either a list of string or list of dict') if type_ == dict: rse = pfn['rse'] if rse not in rse_map: rse_id = get_rse_id(rse=rse, vo=vo, session=session) rse_map[rse] = rse_id pfn['rse_id'] = rse_map[rse] pfn['scope'] = InternalScope(pfn['scope'], vo=vo) replicas = replica.declare_bad_file_replicas(pfns=pfns, reason=reason, issuer=issuer, status=BadFilesStatus.BAD, session=session) for k in list(replicas): try: rse = get_rse_name(rse_id=k, session=session) replicas[rse] = replicas.pop(k) except exception.RSENotFound: pass return replicas
def get_source_rse(request_id, scope, name, src_url, session=None): try: if not request_id: return None, None sources = request_core.get_sources(request_id, session=session) for source in sources: if source['url'] == src_url: src_rse_id = source['rse_id'] src_rse_name = rse_core.get_rse_name(src_rse_id, session=session) logging.debug("Find rse name %s for %s" % (src_rse_name, src_url)) return src_rse_name, src_rse_id # cannot find matched surl logging.warn('Cannot get correct RSE for source url: %s' % (src_url)) return None, None except: logging.error('Cannot get correct RSE for source url: %s(%s)' % (src_url, traceback.format_exc())) return None, None
def get_replica_locks_for_rule_id_per_rse(rule_id, session=None): """ Get the active replica locks for a rule_id per rse. :param rule_id: Filter on rule_id. :param session: The db session. :return: List of dicts {'rse_id':, 'rse':} :raises: NoResultFound """ locks = [] query = session.query(models.ReplicaLock.rse_id).filter_by(rule_id=rule_id).group_by(models.ReplicaLock.rse_id) for row in query: locks.append({'rse_id': row.rse_id, 'rse': get_rse_name(rse_id=row.rse_id, session=session)}) return locks
def get_local_account_limits(account, vo='def'): """ Lists the limitation names/values for the specified account name. REST API: http://<host>:<port>/rucio/account/<account>/limits :param account: The account name. :param vo: The VO to act on. :returns: The account limits. """ account = InternalAccount(account, vo=vo) rse_instead_id = {} for elem in account_limit_core.get_local_account_limits(account=account).items(): rse_instead_id[get_rse_name(rse_id=elem[0])] = elem[1] return rse_instead_id
def api_update_return_dict(dictionary): """ Ensure that rse is in a dictionary returned from core :param dictionary: The dictionary to edit :returns dictionary: The edited dictionary """ if not isinstance(dictionary, dict): return dictionary from rucio.core.rse import get_rse_name # Here to avoid circular dependancy copied = False # Avoid side effects from pass by object if 'rse_id' in dictionary.keys(): if 'rse' not in dictionary.keys(): if not copied: dictionary = dictionary.copy() copied = True dictionary['rse'] = get_rse_name(rse_id=dictionary['rse_id']) return dictionary
def declare_suspicious_file_replicas(pfns, reason, issuer, vo='def', session=None): """ Declare a list of bad replicas. :param pfns: The list of PFNs. :param reason: The reason of the loss. :param issuer: The issuer account. :param vo: The VO to act on. :param session: The database session in use. """ kwargs = {} if not permission.has_permission(issuer=issuer, vo=vo, action='declare_suspicious_file_replicas', kwargs=kwargs, session=session): raise exception.AccessDenied( 'Account %s can not declare suspicious replicas' % (issuer)) issuer = InternalAccount(issuer, vo=vo) replicas = replica.declare_bad_file_replicas( pfns=pfns, reason=reason, issuer=issuer, status=BadFilesStatus.SUSPICIOUS, session=session) for k in list(replicas): try: rse = get_rse_name(rse_id=k, session=session) replicas[rse] = replicas.pop(k) except exception.RSENotFound: pass return replicas
def list_bad_replicas(limit=10000, worker_number=None, total_workers=None, session=None): """ List RSE File replicas with no locks. :param rse: the rse name. :param bytes: the amount of needed bytes. :param session: The database session in use. :returns: a list of dictionary {'scope' scope, 'name': name, 'rse_id': rse_id, 'rse': rse}. """ if session.bind.dialect.name == 'oracle': # The filter(text...)) is needed otherwise, SQLA uses bind variables and the index is not used. query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id).\ with_hint(models.RSEFileAssociation, "+ index(replicas REPLICAS_STATE_IDX)", 'oracle').\ filter(text("CASE WHEN (%s.replicas.state != 'A') THEN %s.replicas.rse_id END IS NOT NULL" % (default_schema_name, default_schema_name))).\ filter(models.RSEFileAssociation.state == ReplicaState.BAD) else: query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id).\ filter(models.RSEFileAssociation.state == ReplicaState.BAD) if worker_number and total_workers and total_workers - 1 > 0: if session.bind.dialect.name == 'oracle': bindparams = [bindparam('worker_number', worker_number - 1), bindparam('total_workers', total_workers - 1)] query = query.filter(text('ORA_HASH(name, :total_workers) = :worker_number', bindparams=bindparams)) elif session.bind.dialect.name == 'mysql': query = query.filter('mod(md5(name), %s) = %s' % (total_workers - 1, worker_number - 1)) elif session.bind.dialect.name == 'postgresql': query = query.filter('mod(abs((\'x\'||md5(name))::bit(32)::int), %s) = %s' % (total_workers - 1, worker_number - 1)) query = query.limit(limit) rows = [] rse_map = {} for scope, name, rse_id in query.yield_per(1000): if rse_id not in rse_map: rse_map[rse_id] = get_rse_name(rse_id=rse_id, session=session) d = {'scope': scope, 'name': name, 'rse_id': rse_id, 'rse': rse_map[rse_id]} rows.append(d) return rows
def list_replicas(dids, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=False, rse_expression=None, client_location=None, domain=None, signature_lifetime=None, resolve_archives=True, resolve_parents=False, updated_after=None, issuer=None): """ List file replicas for a list of data identifiers. :param dids: The list of data identifiers (DIDs). :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...) :param unavailable: Also include unavailable replicas in the list. :param request_id: ID associated with the request for debugging. :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary. :param rse_expression: The RSE expression to restrict replicas on a set of RSEs. :param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site'} :param domain: The network domain for the call, either None, 'wan' or 'lan'. Compatibility fallback: None falls back to 'wan'. :param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN. :param resolve_archives: When set to True, find archives which contain the replicas. :param resolve_parents: When set to True, find all parent datasets which contain the replicas. :param updated_after: datetime object (UTC time), only return replicas updated after this time :param issuer: The issuer account. """ validate_schema(name='r_dids', obj=dids) # Allow selected authenticated users to retrieve signed URLs. # Unauthenticated users, or permission-less users will get the raw URL without the signature. sign_urls = False if permission.has_permission(issuer=issuer, action='get_signed_url', kwargs={}): sign_urls = True for d in dids: d['scope'] = InternalScope(d['scope']) replicas = replica.list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, sign_urls=sign_urls, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after) for rep in replicas: # 'rses' and 'states' use rse_id as the key. This needs updating to be rse. keys = ['rses', 'states'] for k in keys: old_dict = rep.get(k, None) if old_dict is not None: new_dict = {} for rse_id in old_dict: rse = get_rse_name( rse_id=rse_id) if rse_id is not None else None new_dict[rse] = old_dict[rse_id] rep[k] = new_dict rep['scope'] = rep['scope'].external if 'parents' in rep: new_parents = [] for p in rep['parents']: scope, name = p.split(':') scope = InternalScope(scope, fromExternal=False).external new_parents.append('{}:{}'.format(scope, name)) rep['parents'] = new_parents yield rep
def reaper(rses=[], worker_number=1, total_workers=1, chunk_size=100, once=False, scheme=None): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param total_workers: The total number of workers. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param scheme: Force the reaper to use a particular protocol, e.g., mock. """ logging.info('Starting Dark Reaper %s-%s: Will work on RSEs: %s', worker_number, total_workers, str(rses)) pid = os.getpid() thread = threading.current_thread() hostname = socket.gethostname() executable = ' '.join(sys.argv) hash_executable = hashlib.sha256(sys.argv[0] + ''.join(rses)).hexdigest() sanity_check(executable=None, hostname=hostname) while not GRACEFUL_STOP.is_set(): try: # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info( 'Dark Reaper({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}' .format(locals())) nothing_to_do = True random.shuffle(rses) for rse_id in rses: rse = rse_core.get_rse_name(rse_id=rse_id) replicas = list_quarantined_replicas( rse_id=rse_id, limit=chunk_size, worker_number=worker_number, total_workers=total_workers) rse_info = rsemgr.get_rse_info(rse_id=rse_id) prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) deleted_replicas = [] try: prot.connect() for replica in replicas: nothing_to_do = False try: pfn = str( rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values()[0]) logging.info( 'Dark Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) start = time.time() prot.delete(pfn) duration = time.time() - start logging.info( 'Dark Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, duration) add_message( 'deletion-done', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica.get('bytes') or 0, 'bytes': replica.get('bytes') or 0, 'url': pfn, 'duration': duration, 'protocol': prot.attributes['scheme'] }) deleted_replicas.append(replica) except SourceNotFound: err_msg = 'Dark Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) logging.warning(err_msg) deleted_replicas.append(replica) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: err_msg = 'Dark Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, str(error)) logging.warning(err_msg) add_message( 'deletion-failed', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica['bytes'] or 0, 'bytes': replica['bytes'] or 0, 'url': pfn, 'reason': str(error), 'protocol': prot.attributes['scheme'] }) except: logging.critical(traceback.format_exc()) finally: prot.close() delete_quarantined_replicas(rse_id=rse_id, replicas=deleted_replicas) if once: break if once: break if nothing_to_do: logging.info( 'Dark Reaper %s-%s: Nothing to do. I will sleep for 60s', worker_number, total_workers) time.sleep(60) except DatabaseException as error: logging.warning('Reaper: %s', str(error)) except: logging.critical(traceback.format_exc()) die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info('Graceful stop requested') logging.info('Graceful stop done') return
def __handle_requests(reqs, suspicious_patterns, retry_protocol_mismatches, prepend_str=''): """ Used by finisher to handle terminated requests, :param reqs: List of requests. :param suspicious_patterns: List of suspicious patterns. :param retry_protocol_mismatches: Boolean to retry the transfer in case of protocol mismatch. :param prepend_str: String to prepend to logging. """ failed_during_submission = [RequestState.SUBMITTING, RequestState.SUBMISSION_FAILED, RequestState.LOST] failed_no_submission_attempts = [RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES, RequestState.MISMATCH_SCHEME] undeterministic_rses = __get_undeterministic_rses() rses_info, protocols = {}, {} replicas = {} for req in reqs: try: replica = {'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'bytes': req['bytes'], 'adler32': req['adler32'], 'request_id': req['request_id']} replica['pfn'] = req['dest_url'] replica['request_type'] = req['request_type'] replica['error_message'] = None if req['request_type'] not in replicas: replicas[req['request_type']] = {} if req['rule_id'] not in replicas[req['request_type']]: replicas[req['request_type']][req['rule_id']] = [] if req['state'] == RequestState.DONE: replica['state'] = ReplicaState.AVAILABLE replica['archived'] = False # for TAPE, replica path is needed if req['request_type'] in (RequestType.TRANSFER, RequestType.STAGEIN) and req['dest_rse_id'] in undeterministic_rses: if req['dest_rse_id'] not in rses_info: dest_rse = get_rse_name(rse_id=req['dest_rse_id']) rses_info[req['dest_rse_id']] = rsemanager.get_rse_info(dest_rse) pfn = req['dest_url'] scheme = urlparse(pfn).scheme dest_rse_id_scheme = '%s_%s' % (req['dest_rse_id'], scheme) if dest_rse_id_scheme not in protocols: protocols[dest_rse_id_scheme] = rsemanager.create_protocol(rses_info[req['dest_rse_id']], 'write', scheme) path = protocols[dest_rse_id_scheme].parse_pfns([pfn])[pfn]['path'] replica['path'] = os.path.join(path, os.path.basename(pfn)) # replica should not be added to replicas until all info are filled replicas[req['request_type']][req['rule_id']].append(replica) # Standard failure from the transfer tool elif req['state'] == RequestState.FAILED: __check_suspicious_files(req, suspicious_patterns) tss = time.time() try: if request_core.should_retry_request(req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive(req, retry_protocol_mismatches) # should_retry_request and requeue_and_archive are not in one session, # another process can requeue_and_archive and this one will return None. record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) logging.warn(prepend_str + 'REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logging.warn('%s EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', prepend_str, req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) except RequestNotFound: logging.warn('%s Cannot find request %s anymore', prepend_str, req['request_id']) # All other failures elif req['state'] in failed_during_submission or req['state'] in failed_no_submission_attempts: if req['state'] in failed_during_submission and req['updated_at'] > (datetime.datetime.utcnow() - datetime.timedelta(minutes=120)): # To prevent race conditions continue try: tss = time.time() if request_core.should_retry_request(req, retry_protocol_mismatches): new_req = request_core.requeue_and_archive(req, retry_protocol_mismatches) record_timer('daemons.conveyor.common.update_request_state.request-requeue_and_archive', (time.time() - tss) * 1000) logging.warn(prepend_str + 'REQUEUED SUBMITTING DID %s:%s REQUEST %s AS %s TRY %s' % (req['scope'], req['name'], req['request_id'], new_req['request_id'], new_req['retry_count'])) else: # No new_req is return if should_retry_request returns False logging.warn('%s EXCEEDED SUBMITTING DID %s:%s REQUEST %s in state %s', prepend_str, req['scope'], req['name'], req['request_id'], req['state']) replica['state'] = ReplicaState.UNAVAILABLE replica['archived'] = False replica['error_message'] = req['err_msg'] if req['err_msg'] else request_core.get_transfer_error(req['state']) replicas[req['request_type']][req['rule_id']].append(replica) except RequestNotFound: logging.warn('%s Cannot find request %s anymore', prepend_str, req['request_id']) except Exception as error: logging.error(prepend_str + "Something unexpected happened when handling request %s(%s:%s) at %s: %s" % (req['request_id'], req['scope'], req['name'], req['dest_rse_id'], str(error))) __handle_terminated_replicas(replicas, prepend_str)
def GET(self): """ List requests for a given source and destination RSE or site. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 404 Request Not Found 406 Not Acceptable """ header('Content-Type', 'application/x-json-stream') params = parse_qs(ctx.query[1:]) src_rse = params.get('src_rse', [None])[0] dst_rse = params.get('dst_rse', [None])[0] src_site = params.get('src_site', [None])[0] dst_site = params.get('dst_site', [None])[0] request_states = params.get('request_states', [None])[0] if not request_states: raise generate_http_error(400, 'MissingParameter', 'Request state is missing') if src_rse and not dst_rse: raise generate_http_error(400, 'MissingParameter', 'Destination RSE is missing') elif dst_rse and not src_rse: raise generate_http_error(400, 'MissingParameter', 'Source RSE is missing') elif src_site and not dst_site: raise generate_http_error(400, 'MissingParameter', 'Destination site is missing') elif dst_site and not src_site: raise generate_http_error(400, 'MissingParameter', 'Source site is missing') try: states = [ RequestState.from_string(state) for state in request_states.split(',') ] except ValueError: raise generate_http_error(400, 'Invalid', 'Request state value is invalid') src_rses = [] dst_rses = [] if src_site: src_rses = get_rses_with_attribute_value(key='site', value=src_site, lookup_key='site', vo=ctx.env.get('vo')) if not src_rses: raise generate_http_error( 404, 'NotFound', 'Could not resolve site name %s to RSE' % src_site) src_rses = [get_rse_name(rse['rse_id']) for rse in src_rses] dst_rses = get_rses_with_attribute_value(key='site', value=dst_site, lookup_key='site', vo=ctx.env.get('vo')) if not dst_rses: raise generate_http_error( 404, 'NotFound', 'Could not resolve site name %s to RSE' % dst_site) dst_rses = [get_rse_name(rse['rse_id']) for rse in dst_rses] else: dst_rses = [dst_rse] src_rses = [src_rse] for result in request.list_requests(src_rses, dst_rses, states, issuer=ctx.env.get('issuer'), vo=ctx.env.get('vo')): del result['_sa_instance_state'] yield render_json(**result) + '\n'
def queue_requests(requests, session=None): """ Submit transfer or deletion requests on destination RSEs for data identifiers. :param requests: List of dictionaries containing request metadata. :param session: Database session to use. :returns: List of Request-IDs as 32 character hex strings. """ record_counter('core.request.queue_requests') try: for req in requests: if isinstance(req['attributes'], (str, unicode)): req['attributes'] = json.loads(req['attributes']) if isinstance(req['attributes'], (str, unicode)): req['attributes'] = json.loads(req['attributes']) # do not insert duplicate transfer requests if req['request_type'] == RequestType.TRANSFER: if get_request_by_did(req['scope'], req['name'], None, rse_id=req['dest_rse_id'], request_type=RequestType.TRANSFER): continue new_request = models.Request(request_type=req['request_type'], scope=req['scope'], name=req['name'], dest_rse_id=req['dest_rse_id'], attributes=json.dumps(req['attributes']), state=RequestState.QUEUED, rule_id=req['rule_id'], activity=req['attributes']['activity'], bytes=req['attributes']['bytes'], md5=req['attributes']['md5'], adler32=req['attributes']['adler32']) if 'previous_attempt_id' in req and 'retry_count' in req: new_request = models.Request(id=req['request_id'], request_type=req['request_type'], scope=req['scope'], name=req['name'], dest_rse_id=req['dest_rse_id'], attributes=json.dumps(req['attributes']), state=RequestState.QUEUED, previous_attempt_id=req['previous_attempt_id'], retry_count=req['retry_count'], rule_id=req['rule_id'], activity=req['attributes']['activity'], bytes=req['attributes']['bytes'], md5=req['attributes']['md5'], adler32=req['attributes']['adler32']) new_request.save(session=session, flush=False) session.flush() except IntegrityError: logging.warn('Request TYPE %s for DID %s:%s at RSE %s exists - ignoring' % (req['request_type'], req['scope'], req['name'], get_rse_name(req['dest_rse_id']))) raise
def get(self): """ List requests for a given source and destination RSE or site. .. :quickref: RequestsGet; list requests :reqheader Content-Type: application/x-json-stream :status 200: Request found. :status 404: Request not found. :status 406: Not Acceptable. """ src_rse = f_request.get('src_rse') dst_rse = f_request.get('dst_rse') src_site = f_request.get('src_site') dst_site = f_request.get('dst_site') request_states = f_request.get('request_states') if not request_states: return generate_http_error_flask(400, 'MissingParameter', 'Request state is missing') if src_rse and not dst_rse: return generate_http_error_flask(400, 'MissingParameter', 'Destination RSE is missing') elif dst_rse and not src_rse: return generate_http_error_flask(400, 'MissingParameter', 'Source RSE is missing') elif src_site and not dst_site: return generate_http_error_flask(400, 'MissingParameter', 'Destination site is missing') elif dst_site and not src_site: return generate_http_error_flask(400, 'MissingParameter', 'Source site is missing') try: states = [ RequestState.from_string(state) for state in request_states.split(',') ] except ValueError: return generate_http_error_flask(400, 'Invalid', 'Request state value is invalid') src_rses = [] dst_rses = [] if src_site: src_rses = get_rses_with_attribute_value(key='site', value=src_site, lookup_key='site') if not src_rses: return generate_http_error_flask( 404, 'NotFound', 'Could not resolve site name %s to RSE' % src_site) src_rses = [get_rse_name(rse['rse_id']) for rse in src_rses] dst_rses = get_rses_with_attribute_value(key='site', value=dst_site, lookup_key='site') if not dst_rses: return generate_http_error_flask( 404, 'NotFound', 'Could not resolve site name %s to RSE' % dst_site) dst_rses = [get_rse_name(rse['rse_id']) for rse in dst_rses] else: dst_rses = [dst_rse] src_rses = [src_rse] results = [] for result in request.list_requests( src_rses, dst_rses, states, issuer=f_request.environ.get('issuer')): result = result.to_dict() del result['_sa_instance_state'] results.append(result) return json.dumps(results, cls=APIEncoder)
def select_target_rse(current_rse, rse_expression, subscription_id, rse_attributes, other_rses=[], exclude_expression=None, force_expression=None, session=None): """ Select a new target RSE for a rebalanced rule. :param current_rse: RSE of the source. :param rse_expression: RSE Expression of the source rule. :param subscription_id: Subscription ID of the source rule. :param rse_attributes: The attributes of the source rse. :param other_rses: Other RSEs with existing dataset replicas. :param exclude_expression: Exclude this rse_expression from being target_rses. :param force_expression: Force a specific rse_expression as target. :param session: The DB Session :returns: New RSE expression """ if exclude_expression: target_rse = '(%s)\\%s' % (exclude_expression, current_rse) else: target_rse = current_rse rses = parse_expression(expression=rse_expression, session=session) # if subscription_id: # pass # # get_subscription_by_id(subscription_id, session) if force_expression is not None: rses = parse_expression(expression='(%s)\\%s' % (force_expression, target_rse), filter={'availability_write': True}, session=session) elif len(rses) > 1: # Just define the RSE Expression without the current_rse return '(%s)\\%s' % (rse_expression, target_rse) elif rse_attributes['tier'] is True or rse_attributes['tier'] == '1': # Tier 1 should go to another Tier 1 rses = parse_expression(expression='(tier=1&type=DATADISK)\\%s' % target_rse, filter={'availability_write': True}, session=session) elif rse_attributes['tier'] == 2 or rse_attributes['tier'] == '2': # Tier 2 should go to another Tier 2 rses = parse_expression(expression='(tier=2&type=DATADISK)\\%s' % target_rse, filter={'availability_write': True}, session=session) rseselector = RSESelector(account='ddmadmin', rses=rses, weight='freespace', copies=1, ignore_account_limit=True, session=session) return get_rse_name([ rse_id for rse_id, _, _ in rseselector.select_rse( size=0, preferred_rse_ids=[], blacklist=other_rses) ][0], session=session)
response['name'], rse_update_name, traceback.format_exc())) raise record_timer('daemons.conveyor.common.update_request_state.lock-failed_transfer', (time.time()-tss)*1000) else: logging.warn('REQUEUED DID %s:%s REQUEST %s AS %s TRY %s' % (response['scope'], response['name'], response['request_id'], new_req['request_id'], new_req['retry_count'])) elif response['new_state'] == RequestState.LOST: req = request.get_request(response['request_id']) rse_name = rse_core.get_rse_name(rse_id=req['dest_rse_id'], session=session) rse_update_name = rse_name if req['request_type'] == RequestType.STAGEIN: rse_update_name = rse_core.list_rse_attributes(response['dst_rse'], session=session)['staging_buffer'] logging.debug('OVERRIDE REPLICA DID %s:%s RSE %s TO %s' % (response['scope'], response['name'], response['dst_rse'], rse_update_name)) response['scope'] = req['scope'] response['name'] = req['name'] response['dest_rse_id'] = rse_core.get_rse_id(rse=rse_update_name, session=session) response['dst_rse'] = rse_name add_monitor_message(response, session=session) request.archive_request(response['request_id'], session=session) logging.error('LOST DID %s:%s REQUEST %s' % (response['scope'],