Ejemplo n.º 1
0
def list_expired_temporary_dids(rse_id, limit, worker_number=None, total_workers=None,
                                session=None):
    """
    List expired temporary DIDs.

    :param rse_id: the rse id.
    :param limit: The maximum number of replicas returned.
    :param worker_number:      id of the executing worker.
    :param total_workers:      Number of total workers.
    :param session: The database session in use.

    :returns: a list of dictionary replica.
    """
    is_none = None
    query = session.query(models.TemporaryDataIdentifier.scope,
                          models.TemporaryDataIdentifier.name,
                          models.TemporaryDataIdentifier.path,
                          models.TemporaryDataIdentifier.bytes).\
        with_hint(models.TemporaryDataIdentifier, "INDEX(tmp_dids TMP_DIDS_EXPIRED_AT_IDX)", 'oracle').\
        filter(case([(models.TemporaryDataIdentifier.expired_at != is_none, models.TemporaryDataIdentifier.rse_id), ]) == rse_id)

    query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='name')

    return [{'path': path,
             'rse_id': rse_id,
             'scope': scope,
             'name': name,
             'bytes': bytes_}
            for scope, name, path, bytes_ in query.limit(limit)]
Ejemplo n.º 2
0
def list_quarantined_replicas(rse_id, limit, worker_number=None, total_workers=None, session=None):
    """
    List RSE Quarantined File replicas.

    :param rse_id: the rse id.
    :param limit: The maximum number of replicas returned.
    :param worker_number:      id of the executing worker.
    :param total_workers:      Number of total workers.
    :param session: The database session in use.

    :returns: two lists :
              - The first one contains quarantine replicas actually registered in the replicas tables
              - The second one contains real "dark" files
    """

    replicas_clause = []
    quarantined_replicas = {}
    real_replicas = []
    dark_replicas = []
    query = session.query(models.QuarantinedReplica.path,
                          models.QuarantinedReplica.bytes,
                          models.QuarantinedReplica.scope,
                          models.QuarantinedReplica.name,
                          models.QuarantinedReplica.created_at).\
        filter(models.QuarantinedReplica.rse_id == rse_id)
    query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='path')

    for path, bytes_, scope, name, created_at in query.limit(limit):
        if not (scope, name) in quarantined_replicas:
            quarantined_replicas[(scope, name)] = []
            replicas_clause.append(and_(models.RSEFileAssociation.scope == scope,
                                        models.RSEFileAssociation.name == name))
        quarantined_replicas[(scope, name)].append((path, bytes_, created_at))

    for chunk in chunks(replicas_clause, 20):
        query = session.query(models.RSEFileAssociation.scope,
                              models.RSEFileAssociation.name).\
            filter(models.RSEFileAssociation.rse_id == rse_id).\
            filter(or_(*chunk))

        for scope, name in query.all():
            reps = quarantined_replicas.pop((scope, name))
            real_replicas.extend([{'scope': scope,
                                   'name': name,
                                   'rse_id': rse_id,
                                   'path': rep[0],
                                   'bytes': rep[1],
                                   'created_at': rep[2]}
                                  for rep in reps])

    for key, value in quarantined_replicas.items():
        dark_replicas.extend([{'scope': key[0],
                               'name': key[1],
                               'rse_id': rse_id,
                               'path': rep[0],
                               'bytes': rep[1],
                               'created_at': rep[2]}
                              for rep in value])

    return real_replicas, dark_replicas
Ejemplo n.º 3
0
def list_quarantined_replicas(rse_id, limit, worker_number=None, total_workers=None, session=None):
    """
    List RSE Quarantined File replicas.

    :param rse_id: the rse id.
    :param limit: The maximum number of replicas returned.
    :param worker_number:      id of the executing worker.
    :param total_workers:      Number of total workers.
    :param session: The database session in use.

    :returns: a list of dictionary replica.
    """

    query = session.query(models.QuarantinedReplica.path,
                          models.QuarantinedReplica.bytes,
                          models.QuarantinedReplica.scope,
                          models.QuarantinedReplica.name,
                          models.QuarantinedReplica.created_at).\
        filter(models.QuarantinedReplica.rse_id == rse_id)

    # do no delete valid replicas
    stmt = exists(select([1]).prefix_with("/*+ index(REPLICAS REPLICAS_PK) */", dialect='oracle')).\
        where(and_(models.RSEFileAssociation.scope == models.QuarantinedReplica.scope,
                   models.RSEFileAssociation.name == models.QuarantinedReplica.name,
                   models.RSEFileAssociation.rse_id == models.QuarantinedReplica.rse_id))
    query = query.filter(not_(stmt))
    query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='path')
    return [{'path': path,
             'rse_id': rse_id,
             'created_at': created_at,
             'scope': scope,
             'name': name,
             'bytes': bytes}
            for path, bytes, scope, name, created_at in query.limit(limit)]
Ejemplo n.º 4
0
def get_updated_rse_counters(total_workers, worker_number, session=None):
    """
    Get updated rse_counters.

    :param total_workers:      Number of total workers.
    :param worker_number:      id of the executing worker.
    :param session:            Database session in use.
    :returns:                  List of rse_ids whose rse_counters need to be updated.
    """
    query = session.query(models.UpdatedRSECounter.rse_id).\
        distinct(models.UpdatedRSECounter.rse_id)

    query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='rse_id')
    results = query.all()
    return [result.rse_id for result in results]
Ejemplo n.º 5
0
def delete_expired_tokens(total_workers,
                          worker_number,
                          limit=1000,
                          session=None):
    """
    Delete expired tokens.

    :param total_workers:      Number of total workers.
    :param worker_number:      id of the executing worker.
    :param limit:              Maximum number of tokens to delete.
    :param session:            Database session in use.

    :returns: number of deleted rows
    """

    # get expired tokens
    try:
        # delete all expired tokens except tokens which have refresh token that is still valid
        query = session.query(models.Token.token).filter(and_(models.Token.expired_at <= datetime.datetime.utcnow()))\
                                                 .filter(or_(models.Token.refresh_expired_at.__eq__(None),
                                                             models.Token.refresh_expired_at <= datetime.datetime.utcnow()))\
                                                 .order_by(models.Token.expired_at)

        query = filter_thread_work(session=session,
                                   query=query,
                                   total_threads=total_workers,
                                   thread_id=worker_number,
                                   hash_variable='token')

        # limiting the number of tokens deleted at once
        filtered_tokens_query = query.limit(limit)
        # remove expired tokens
        deleted_tokens = 0
        filtered_bunches = query_bunches(filtered_tokens_query, 10)
        for items in filtered_bunches:
            deleted_tokens += session.query(models.Token.token)\
                                     .filter(models.Token.token.in_(items))\
                                     .with_for_update(skip_locked=True)\
                                     .delete(synchronize_session='fetch')

    except Exception as error:
        raise RucioException(error.args)

    return deleted_tokens
Ejemplo n.º 6
0
def get_tokens_for_refresh(total_workers,
                           worker_number,
                           refreshrate=3600,
                           limit=1000,
                           session=None):
    """
    Get tokens which expired or will expire before (now + refreshrate)
    next run of this function and which have valid refresh token.

    :param total_workers:      Number of total workers.
    :param worker_number:      id of the executing worker.
    :param limit:              Maximum number of tokens to refresh per call.
    :param session:            Database session in use.

    :return: filtered_tokens, list of tokens eligible for refresh. Throws an Exception otherwise.
    """
    try:
        # get tokens for refresh that expire in the next <refreshrate> seconds
        expiration_future = datetime.datetime.utcnow() + datetime.timedelta(
            seconds=refreshrate)
        query = session.query(models.Token.token).filter(and_(models.Token.refresh == true(),
                                                              models.Token.refresh_expired_at > datetime.datetime.utcnow(),
                                                              models.Token.expired_at < expiration_future))\
                                                 .order_by(models.Token.expired_at)
        query = filter_thread_work(session=session,
                                   query=query,
                                   total_threads=total_workers,
                                   thread_id=worker_number,
                                   hash_variable='token')

        # limiting the number of tokens for refresh
        filtered_tokens_query = query.limit(limit)
        filtered_tokens = []
        filtered_bunches = query_bunches(filtered_tokens_query, 10)
        for items in filtered_bunches:
            filtered_tokens += session.query(models.Token).filter(
                models.Token.token.in_(items)).with_for_update(
                    skip_locked=True).all()

    except Exception as error:
        raise RucioException(error.args)

    return filtered_tokens
Ejemplo n.º 7
0
def get_updated_account_counters(total_workers, worker_number, session=None):
    """
    Get updated rse_counters.

    :param total_workers:      Number of total workers.
    :param worker_number:      id of the executing worker.
    :param session:            Database session in use.
    :returns:                  List of rse_ids whose rse_counters need to be updated.
    """
    query = session.query(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id).\
        distinct(models.UpdatedAccountCounter.account, models.UpdatedAccountCounter.rse_id)

    if session.bind.dialect.name == 'oracle':
        hash_variable = 'CONCAT(account, rse_id)'''
    else:
        hash_variable = 'concat(account, rse_id)'

    query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable=hash_variable)

    return query.all()
Ejemplo n.º 8
0
def retrieve_messages(bulk=1000,
                      thread=None,
                      total_threads=None,
                      event_type=None,
                      lock=False,
                      session=None):
    """
    Retrieve up to $bulk messages.

    :param bulk: Number of messages as an integer.
    :param thread: Identifier of the caller thread as an integer.
    :param total_threads: Maximum number of threads as an integer.
    :param event_type: Return only specified event_type. If None, returns everything except email.
    :param lock: Select exclusively some rows.
    :param session: The database session to use.

    :returns messages: List of dictionaries {id, created_at, event_type, payload, services}
    """
    messages = []
    try:
        subquery = session.query(Message.id)
        subquery = filter_thread_work(session=session,
                                      query=subquery,
                                      total_threads=total_threads,
                                      thread_id=thread)
        if event_type:
            subquery = subquery.filter_by(event_type=event_type)
        else:
            subquery = subquery.filter(Message.event_type != 'email')

        # Step 1:
        # MySQL does not support limits in nested queries, limit on the outer query instead.
        # This is not as performant, but the best we can get from MySQL.
        if session.bind.dialect.name == 'mysql':
            subquery = subquery.order_by(Message.created_at)
        else:
            subquery = subquery.order_by(Message.created_at).limit(bulk)

        query = session.query(Message.id,
                              Message.created_at,
                              Message.event_type,
                              Message.payload,
                              Message.services)\
            .filter(Message.id.in_(subquery))\
            .with_for_update(nowait=True)

        # Step 2:
        # MySQL does not support limits in nested queries, limit on the outer query instead.
        # This is not as performant, but the best we can get from MySQL.
        if session.bind.dialect.name == 'mysql':
            query = query.limit(bulk)

        # Step 3:
        # Assemble message object
        for id, created_at, event_type, payload, services in query:
            message = {
                'id': id,
                'created_at': created_at,
                'event_type': event_type,
                'services': services
            }

            # Only switch SQL context when necessary
            if payload == 'nolimit':
                nolimit_query = session.query(
                    Message.payload_nolimit).filter(Message.id == id).one()[0]
                message['payload'] = json.loads(str(nolimit_query))
            else:
                message['payload'] = json.loads(str(payload))

            messages.append(message)

        return messages

    except IntegrityError as e:
        raise RucioException(e.args)
Ejemplo n.º 9
0
def get_files_and_replica_locks_of_dataset(scope, name, nowait=False, restrict_rses=None, only_stuck=False,
                                           total_threads=None, thread_id=None,
                                           session=None):
    """
    Get all the files of a dataset and, if existing, all locks of the file.

    :param scope:          Scope of the dataset
    :param name:           Name of the datset
    :param nowait:         Nowait parameter for the FOR UPDATE statement
    :param restrict_rses:  Possible RSE_ids to filter on.
    :param only_stuck:     If true, only get STUCK locks.
    :param total_threads:  Total threads
    :param thread_id:      This thread
    :param session:        The db session.
    :return:               Dictionary with keys: (scope, name)
                           and as value: [LockObject]
    :raises:               NoResultFound
    """
    locks = {}
    if session.bind.dialect.name == 'postgresql':
        content_query = session.query(models.DataIdentifierAssociation.child_scope,
                                      models.DataIdentifierAssociation.child_name).\
            with_hint(models.DataIdentifierAssociation,
                      "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
                      'oracle').\
            filter(models.DataIdentifierAssociation.scope == scope,
                   models.DataIdentifierAssociation.name == name)

        if total_threads and total_threads > 1:
            content_query = filter_thread_work(session=session, query=content_query, total_threads=total_threads,
                                               thread_id=thread_id, hash_variable='child_name')

        for child_scope, child_name in content_query.yield_per(1000):
            locks[(child_scope, child_name)] = []

        query = session.query(models.DataIdentifierAssociation.child_scope,
                              models.DataIdentifierAssociation.child_name,
                              models.ReplicaLock).\
            with_hint(models.DataIdentifierAssociation,
                      "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
                      'oracle').\
            filter(and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope,
                        models.DataIdentifierAssociation.child_name == models.ReplicaLock.name))\
            .filter(models.DataIdentifierAssociation.scope == scope,
                    models.DataIdentifierAssociation.name == name)

        if restrict_rses is not None:
            rse_clause = []
            for rse_id in restrict_rses:
                rse_clause.append(models.ReplicaLock.rse_id == rse_id)
            if rse_clause:
                query = session.query(models.DataIdentifierAssociation.child_scope,
                                      models.DataIdentifierAssociation.child_name,
                                      models.ReplicaLock).\
                    with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\
                    filter(and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope,
                                models.DataIdentifierAssociation.child_name == models.ReplicaLock.name,
                                or_(*rse_clause)))\
                    .filter(models.DataIdentifierAssociation.scope == scope,
                            models.DataIdentifierAssociation.name == name)
    else:
        query = session.query(models.DataIdentifierAssociation.child_scope,
                              models.DataIdentifierAssociation.child_name,
                              models.ReplicaLock).\
            with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\
            outerjoin(models.ReplicaLock,
                      and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope,
                           models.DataIdentifierAssociation.child_name == models.ReplicaLock.name))\
            .filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name)

        if restrict_rses is not None:
            rse_clause = []
            for rse_id in restrict_rses:
                rse_clause.append(models.ReplicaLock.rse_id == rse_id)
            if rse_clause:
                query = session.query(models.DataIdentifierAssociation.child_scope,
                                      models.DataIdentifierAssociation.child_name,
                                      models.ReplicaLock).\
                    with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle').\
                    outerjoin(models.ReplicaLock,
                              and_(models.DataIdentifierAssociation.child_scope == models.ReplicaLock.scope,
                                   models.DataIdentifierAssociation.child_name == models.ReplicaLock.name,
                                   or_(*rse_clause)))\
                    .filter(models.DataIdentifierAssociation.scope == scope,
                            models.DataIdentifierAssociation.name == name)

    if only_stuck:
        query = query.filter(models.ReplicaLock.state == LockState.STUCK)

    if total_threads and total_threads > 1:
        query = filter_thread_work(session=session, query=query, total_threads=total_threads,
                                   thread_id=thread_id, hash_variable='child_name')

    query = query.with_for_update(nowait=nowait, of=models.ReplicaLock.state)

    for child_scope, child_name, lock in query:
        if (child_scope, child_name) not in locks:
            if lock is None:
                locks[(child_scope, child_name)] = []
            else:
                locks[(child_scope, child_name)] = [lock]
        else:
            locks[(child_scope, child_name)].append(lock)

    return locks