def list_expired_temporary_dids(rse_id, limit, worker_number=None, total_workers=None, session=None): """ List expired temporary DIDs. :param rse_id: the rse id. :param limit: The maximum number of replicas returned. :param worker_number: id of the executing worker. :param total_workers: Number of total workers. :param session: The database session in use. :returns: a list of dictionary replica. """ is_none = None query = session.query(models.TemporaryDataIdentifier.scope, models.TemporaryDataIdentifier.name, models.TemporaryDataIdentifier.path, models.TemporaryDataIdentifier.bytes).\ with_hint(models.TemporaryDataIdentifier, "INDEX(tmp_dids TMP_DIDS_EXPIRED_AT_IDX)", 'oracle').\ filter(case([(models.TemporaryDataIdentifier.expired_at != is_none, models.TemporaryDataIdentifier.rse_id), ]) == rse_id) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='name') return [{'path': path, 'rse_id': rse_id, 'scope': scope, 'name': name, 'bytes': bytes_} for scope, name, path, bytes_ in query.limit(limit)]
def list_quarantined_replicas(rse_id, limit, worker_number=None, total_workers=None, session=None): """ List RSE Quarantined File replicas. :param rse_id: the rse id. :param limit: The maximum number of replicas returned. :param worker_number: id of the executing worker. :param total_workers: Number of total workers. :param session: The database session in use. :returns: two lists : - The first one contains quarantine replicas actually registered in the replicas tables - The second one contains real "dark" files """ replicas_clause = [] quarantined_replicas = {} real_replicas = [] dark_replicas = [] query = session.query(models.QuarantinedReplica.path, models.QuarantinedReplica.bytes, models.QuarantinedReplica.scope, models.QuarantinedReplica.name, models.QuarantinedReplica.created_at).\ filter(models.QuarantinedReplica.rse_id == rse_id) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='path') for path, bytes_, scope, name, created_at in query.limit(limit): if not (scope, name) in quarantined_replicas: quarantined_replicas[(scope, name)] = [] replicas_clause.append(and_(models.RSEFileAssociation.scope == scope, models.RSEFileAssociation.name == name)) quarantined_replicas[(scope, name)].append((path, bytes_, created_at)) for chunk in chunks(replicas_clause, 20): query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name).\ filter(models.RSEFileAssociation.rse_id == rse_id).\ filter(or_(*chunk)) for scope, name in query.all(): reps = quarantined_replicas.pop((scope, name)) real_replicas.extend([{'scope': scope, 'name': name, 'rse_id': rse_id, 'path': rep[0], 'bytes': rep[1], 'created_at': rep[2]} for rep in reps]) for key, value in quarantined_replicas.items(): dark_replicas.extend([{'scope': key[0], 'name': key[1], 'rse_id': rse_id, 'path': rep[0], 'bytes': rep[1], 'created_at': rep[2]} for rep in value]) return real_replicas, dark_replicas
def list_quarantined_replicas(rse_id, limit, worker_number=None, total_workers=None, session=None): """ List RSE Quarantined File replicas. :param rse_id: the rse id. :param limit: The maximum number of replicas returned. :param worker_number: id of the executing worker. :param total_workers: Number of total workers. :param session: The database session in use. :returns: a list of dictionary replica. """ query = session.query(models.QuarantinedReplica.path, models.QuarantinedReplica.bytes, models.QuarantinedReplica.scope, models.QuarantinedReplica.name, models.QuarantinedReplica.created_at).\ filter(models.QuarantinedReplica.rse_id == rse_id) # do no delete valid replicas stmt = exists(select([1]).prefix_with("/*+ index(REPLICAS REPLICAS_PK) */", dialect='oracle')).\ where(and_(models.RSEFileAssociation.scope == models.QuarantinedReplica.scope, models.RSEFileAssociation.name == models.QuarantinedReplica.name, models.RSEFileAssociation.rse_id == models.QuarantinedReplica.rse_id)) query = query.filter(not_(stmt)) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='path') return [{'path': path, 'rse_id': rse_id, 'created_at': created_at, 'scope': scope, 'name': name, 'bytes': bytes} for path, bytes, scope, name, created_at in query.limit(limit)]
def get_updated_rse_counters(total_workers, worker_number, session=None): """ Get updated rse_counters. :param total_workers: Number of total workers. :param worker_number: id of the executing worker. :param session: Database session in use. :returns: List of rse_ids whose rse_counters need to be updated. """ query = session.query(models.UpdatedRSECounter.rse_id).\ distinct(models.UpdatedRSECounter.rse_id) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='rse_id') results = query.all() return [result.rse_id for result in results]
def delete_expired_tokens(total_workers, worker_number, limit=1000, session=None): """ Delete expired tokens. :param total_workers: Number of total workers. :param worker_number: id of the executing worker. :param limit: Maximum number of tokens to delete. :param session: Database session in use. :returns: number of deleted rows """ # get expired tokens try: # delete all expired tokens except tokens which have refresh token that is still valid query = session.query(models.Token.token).filter(and_(models.Token.expired_at <= datetime.datetime.utcnow()))\ .filter(or_(models.Token.refresh_expired_at.__eq__(None), models.Token.refresh_expired_at <= datetime.datetime.utcnow()))\ .order_by(models.Token.expired_at) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='token') # limiting the number of tokens deleted at once filtered_tokens_query = query.limit(limit) # remove expired tokens deleted_tokens = 0 filtered_bunches = query_bunches(filtered_tokens_query, 10) for items in filtered_bunches: deleted_tokens += session.query(models.Token.token)\ .filter(models.Token.token.in_(items))\ .with_for_update(skip_locked=True)\ .delete(synchronize_session='fetch') except Exception as error: raise RucioException(error.args) return deleted_tokens
def get_tokens_for_refresh(total_workers, worker_number, refreshrate=3600, limit=1000, session=None): """ Get tokens which expired or will expire before (now + refreshrate) next run of this function and which have valid refresh token. :param total_workers: Number of total workers. :param worker_number: id of the executing worker. :param limit: Maximum number of tokens to refresh per call. :param session: Database session in use. :return: filtered_tokens, list of tokens eligible for refresh. Throws an Exception otherwise. """ try: # get tokens for refresh that expire in the next <refreshrate> seconds expiration_future = datetime.datetime.utcnow() + datetime.timedelta( seconds=refreshrate) query = session.query(models.Token.token).filter(and_(models.Token.refresh == true(), models.Token.refresh_expired_at > datetime.datetime.utcnow(), models.Token.expired_at < expiration_future))\ .order_by(models.Token.expired_at) query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='token') # limiting the number of tokens for refresh filtered_tokens_query = query.limit(limit) filtered_tokens = [] filtered_bunches = query_bunches(filtered_tokens_query, 10) for items in filtered_bunches: filtered_tokens += session.query(models.Token).filter( models.Token.token.in_(items)).with_for_update( skip_locked=True).all() except Exception as error: raise RucioException(error.args) return filtered_tokens
def get_updated_account_counters(total_workers, worker_number, session=None): """ Get updated rse_counters. :param total_workers: Number of total workers. :param worker_number: id of the executing worker. :param session: Database session in use. :returns: List of rse_ids whose rse_counters need to be updated. """ query = session.query(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id).\ distinct(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id) if session.bind.dialect.name == 'oracle': hash_variable = 'CONCAT(account, rse_id)''' else: hash_variable = 'concat(account, rse_id)' query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable=hash_variable) return query.all()
def retrieve_messages(bulk=1000, thread=None, total_threads=None, event_type=None, lock=False, session=None): """ Retrieve up to $bulk messages. :param bulk: Number of messages as an integer. :param thread: Identifier of the caller thread as an integer. :param total_threads: Maximum number of threads as an integer. :param event_type: Return only specified event_type. If None, returns everything except email. :param lock: Select exclusively some rows. :param session: The database session to use. :returns messages: List of dictionaries {id, created_at, event_type, payload, services} """ messages = [] try: subquery = session.query(Message.id) subquery = filter_thread_work(session=session, query=subquery, total_threads=total_threads, thread_id=thread) if event_type: subquery = subquery.filter_by(event_type=event_type) else: subquery = subquery.filter(Message.event_type != 'email') # Step 1: # MySQL does not support limits in nested queries, limit on the outer query instead. # This is not as performant, but the best we can get from MySQL. if session.bind.dialect.name == 'mysql': subquery = subquery.order_by(Message.created_at) else: subquery = subquery.order_by(Message.created_at).limit(bulk) query = session.query(Message.id, Message.created_at, Message.event_type, Message.payload, Message.services)\ .filter(Message.id.in_(subquery))\ .with_for_update(nowait=True) # Step 2: # MySQL does not support limits in nested queries, limit on the outer query instead. # This is not as performant, but the best we can get from MySQL. if session.bind.dialect.name == 'mysql': query = query.limit(bulk) # Step 3: # Assemble message object for id, created_at, event_type, payload, services in query: message = { 'id': id, 'created_at': created_at, 'event_type': event_type, 'services': services } # Only switch SQL context when necessary if payload == 'nolimit': nolimit_query = session.query( Message.payload_nolimit).filter(Message.id == id).one()[0] message['payload'] = json.loads(str(nolimit_query)) else: message['payload'] = json.loads(str(payload)) messages.append(message) return messages except IntegrityError as e: raise RucioException(e.args)
def get_files_and_replica_locks_of_dataset(scope, name, nowait=False, restrict_rses=None, only_stuck=False, total_threads=None, thread_id=None, session=None): """ Get all the files of a dataset and, if existing, all locks of the file. :param scope: Scope of the dataset :param name: Name of the datset :param nowait: Nowait parameter for the FOR UPDATE statement :param restrict_rses: Possible RSE_ids to filter on. :param only_stuck: If true, only get STUCK locks. :param total_threads: Total threads :param thread_id: This thread :param session: The db session. :return: Dictionary with keys: (scope, name) and as value: [LockObject] :raises: NoResultFound """ locks = {} if session.bind.dialect.name == 'postgresql': content_query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name).\ with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\ filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name) if total_threads and total_threads > 1: content_query = filter_thread_work(session=session, query=content_query, total_threads=total_threads, thread_id=thread_id, hash_variable='child_name') for child_scope, child_name in content_query.yield_per(1000): locks[(child_scope, child_name)] = [] query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name, models.ReplicaLock).\ with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\ filter(and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope, models.DataIdentifierAssociation.child_name == models.ReplicaLock.name))\ .filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name) if restrict_rses is not None: rse_clause = [] for rse_id in restrict_rses: rse_clause.append(models.ReplicaLock.rse_id == rse_id) if rse_clause: query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name, models.ReplicaLock).\ with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\ filter(and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope, models.DataIdentifierAssociation.child_name == models.ReplicaLock.name, or_(*rse_clause)))\ .filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name) else: query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name, models.ReplicaLock).\ with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\ outerjoin(models.ReplicaLock, and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope, models.DataIdentifierAssociation.child_name == models.ReplicaLock.name))\ .filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name) if restrict_rses is not None: rse_clause = [] for rse_id in restrict_rses: rse_clause.append(models.ReplicaLock.rse_id == rse_id) if rse_clause: query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name, models.ReplicaLock).\ with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\ outerjoin(models.ReplicaLock, and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope, models.DataIdentifierAssociation.child_name == models.ReplicaLock.name, or_(*rse_clause)))\ .filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name) if only_stuck: query = query.filter(models.ReplicaLock.state == LockState.STUCK) if total_threads and total_threads > 1: query = filter_thread_work(session=session, query=query, total_threads=total_threads, thread_id=thread_id, hash_variable='child_name') query = query.with_for_update(nowait=nowait, of=models.ReplicaLock.state) for child_scope, child_name, lock in query: if (child_scope, child_name) not in locks: if lock is None: locks[(child_scope, child_name)] = [] else: locks[(child_scope, child_name)] = [lock] else: locks[(child_scope, child_name)].append(lock) return locks