Ejemplo n.º 1
0
def test_two_sources_one_destination(db_session, vo, file, mock_request):
    def setup(rse):
        add_distance(rse.rse_id, mock_request.dest_rse_id, ranking=2, session=rse.db_session)
        add_replicas(rse_id=rse.rse_id, files=[file], account=mock_request.account, session=rse.db_session)

    with GeneratedRSE(vo=vo, db_session=db_session, setup_func=setup) as source2_rse:
        src1_distance, src2_distance = (
            get_distances(
                src_rse_id=src_rse,
                dest_rse_id=mock_request.dest_rse_id,
                session=db_session,
            )
            for src_rse in (mock_request.source_rse_id, source2_rse.rse_id)
        )

        assert src1_distance and len(src1_distance) == 1 and src1_distance[0]['ranking'] == 5
        assert src2_distance and len(src2_distance) == 1 and src2_distance[0]['ranking'] == 2

        preparer.run_once(session=db_session, logger=print)
        db_session.commit()

        updated_mock_request = (
            db_session.query(models.Request).filter_by(id=mock_request.id).one()
        )  # type: models.Request

        assert updated_mock_request.state == RequestState.QUEUED
        assert updated_mock_request.source_rse_id == source2_rse.rse_id  # distance 2 < 5

        delete_replicas(rse_id=source2_rse.rse_id, files=[file], session=db_session)
Ejemplo n.º 2
0
def delete_replicas(rse, files, issuer, ignore_availability=False):
    """
    Bulk delete file replicas.

    :param rse: The RSE name.
    :param files: The list of files.
    :param issuer: The issuer account.
    :param ignore_availability: Ignore the RSE blacklisting.

    :returns: True is successful, False otherwise
    """
    validate_schema(name='r_dids', obj=files)

    rse_id = get_rse_id(rse=rse)

    kwargs = {'rse': rse, 'rse_id': rse_id}
    if not permission.has_permission(
            issuer=issuer, action='delete_replicas', kwargs=kwargs):
        raise exception.AccessDenied(
            'Account %s can not delete file replicas on %s' % (issuer, rse))
    if not permission.has_permission(
            issuer=issuer, action='skip_availability_check', kwargs=kwargs):
        ignore_availability = False

    for f in files:
        f['scope'] = InternalScope(f['scope'])

    replica.delete_replicas(rse_id=rse_id,
                            files=files,
                            ignore_availability=ignore_availability)
Ejemplo n.º 3
0
    def cleanup(self, session=None):
        if not self.created_dids:
            return

        # Cleanup Transfers
        session.query(models.Source).filter(or_(and_(models.Source.scope == did['scope'],
                                                     models.Source.name == did['name'])
                                                for did in self.created_dids)).delete(synchronize_session=False)
        session.query(models.Request).filter(or_(and_(models.Request.scope == did['scope'],
                                                      models.Request.name == did['name'])
                                                 for did in self.created_dids)).delete(synchronize_session=False)

        # Cleanup Locks Rules
        query = session.query(models.ReplicationRule.id).filter(or_(and_(models.ReplicationRule.scope == did['scope'],
                                                                         models.ReplicationRule.name == did['name'])
                                                                    for did in self.created_dids))
        for rule_id, in query:
            rule_core.delete_rule(rule_id, session=session)

        # Cleanup Replicas and Parent Datasets
        dids_by_rse = {}
        replicas = list(replica_core.list_replicas(self.created_dids, all_states=True, session=session))
        for replica in replicas:
            for rse_id in replica['rses']:
                dids_by_rse.setdefault(rse_id, []).append({'scope': replica['scope'], 'name': replica['name']})
        for rse_id, dids in dids_by_rse.items():
            replica_core.delete_replicas(rse_id=rse_id, files=dids, session=session)
Ejemplo n.º 4
0
def test_two_sources_one_destination(db_session, vo, file, source_rse,
                                     source2_rse, mock_request):
    add_replicas(rse_id=source2_rse['id'],
                 files=[file],
                 account=mock_request.account,
                 session=db_session)
    try:
        src1_distance, src2_distance = (get_distances(
            src_rse_id=src_rse,
            dest_rse_id=mock_request.dest_rse_id,
            session=db_session) for src_rse in (source_rse['id'],
                                                source2_rse['id']))

        assert src1_distance and len(
            src1_distance) == 1 and src1_distance[0]['ranking'] == 5
        assert src2_distance and len(
            src2_distance) == 1 and src2_distance[0]['ranking'] == 2

        preparer.run_once(session=db_session)
        db_session.commit()

        updated_mock_request = db_session.query(models.Request).filter_by(
            id=mock_request.id).one()  # type: models.Request

        assert updated_mock_request.state == RequestState.QUEUED
        assert updated_mock_request.source_rse_id == source2_rse[
            'id']  # distance 2 < 5

    finally:
        delete_replicas(rse_id=source2_rse['id'],
                        files=[file],
                        session=db_session)
        db_session.commit()
Ejemplo n.º 5
0
 def teardown(self):
     """RucioCache (Func): Clean necessary rse and dids """
     try:
         replica.delete_replicas(self.rse_exist_novolatile, self.file_replica_on_novolatile)
         rse.del_rse(self.rse_exist_volatile, 'root')
         rse.del_rse(self.rse_exist_novolatile, 'root')
     except Exception, e:
         print e
Ejemplo n.º 6
0
 def teardown(self):
     """RucioCache (Func): Clean necessary rse and dids """
     try:
         replica.delete_replicas(self.rse_exist_novolatile,
                                 self.file_replica_on_novolatile)
         rse.del_rse(self.rse_exist_volatile, 'root')
         rse.del_rse(self.rse_exist_novolatile, 'root')
     except Exception, e:
         print e
Ejemplo n.º 7
0
def test_archive_on_dataset_level(rse_factory, did_factory, root_account):
    rse_name, rse_id = rse_factory.make_xroot_rse()

    dataset1 = did_factory.make_dataset()
    dataset2 = did_factory.make_dataset()
    container = did_factory.make_container()
    attach_dids(dids=[dataset1, dataset2], account=root_account, **container)

    # Add a random file to the datasets to avoid dataset deletion when the archive is deleted
    a_file = did_factory.random_did()
    add_replicas(rse_id=rse_id,
                 files=[{
                     **a_file, 'bytes': 500,
                     'type': 'FILE',
                     'adler32': 'beefbeef'
                 }],
                 account=root_account)
    attach_dids(dids=[a_file], account=root_account, **dataset1)
    attach_dids(dids=[a_file], account=root_account, **dataset2)
    # adding a non-archive file should not set is_archive=True
    metadata = get_metadata(**dataset1)
    assert not metadata['is_archive']

    # Create an archive and its constituents, attach the archive to datasets
    archive = did_factory.random_did(name_prefix='archive', name_suffix='.zip')
    add_replicas(rse_id=rse_id,
                 files=[{
                     **archive, 'bytes': 500,
                     'type': 'FILE',
                     'adler32': 'beefbeef'
                 }],
                 account=root_account)
    constituents = [did_factory.random_did() for _ in range(2)]
    # Add archive to one dataset _before_ attaching files to the archive (before is_archive is set on the archive did)
    attach_dids(dids=[archive], account=root_account, **dataset1)
    attach_dids(dids=[{
        **c, 'bytes': 200,
        'adler32': 'ababbaba'
    } for c in constituents],
                account=root_account,
                **archive)
    # Attach to another dataset _after_ attaching files to the archive
    attach_dids(dids=[archive], account=root_account, **dataset2)

    # Both datasets must have is_archive = True
    metadata = get_metadata(**dataset1)
    assert metadata['is_archive'] is True
    metadata = get_metadata(**dataset2)
    assert metadata['is_archive'] is True

    # Delete the archive, the datasets must now have is_archive == false
    delete_replicas(rse_id=rse_id, files=[archive])

    metadata = get_metadata(**dataset1)
    assert not metadata['is_archive']
    metadata = get_metadata(**dataset2)
    assert not metadata['is_archive']
Ejemplo n.º 8
0
 def __cleanup_replicas(self, session=None):
     # Cleanup Replicas and Parent Datasets
     query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id). \
         filter(models.RSEFileAssociation.rse_id.in_(self.created_rses))
     dids_by_rse = {}
     for scope, name, rse_id in query:
         dids_by_rse.setdefault(rse_id, []).append({'scope': scope, 'name': name})
     for rse_id, dids in dids_by_rse.items():
         replica_core.delete_replicas(rse_id=rse_id, files=dids, session=session)
     # Cleanup BadReplicas
     session.query(models.BadReplicas).filter(models.BadReplicas.rse_id.in_(self.created_rses)).delete(synchronize_session=False)
Ejemplo n.º 9
0
    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1} for i in range(0, 2)]
        self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{'name': file['did_name'], 'scope': file['did_scope']} for file in self.files])
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['bytes'], 0)
        assert_equal(dataset_replica['length'], 0)
        assert_equal(dataset_replica['available_bytes'], 0)
        assert_equal(dataset_replica['available_length'], 0)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['available_bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files))
        assert_equal(str(dataset_replica['state']), 'AVAILABLE')

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id, files=[{'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'])}])
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0]
        assert_equal(dataset_replica['length'], len(self.files))
        assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes)
        assert_equal(dataset_replica['available_length'], len(self.files) - 1)
        assert_equal(dataset_replica['available_bytes'], (len(self.files) - 1) * self.file_sizes)
        assert_equal(str(dataset_replica['state']), 'UNAVAILABLE')

        # Delete all files -> collection replica should be deleted
        cleaner.run(once=True)
        reaper.run(once=True, rses=[self.rse], greedy=True)
        self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)]
        assert_equal(len(dataset_replica), 0)
Ejemplo n.º 10
0
    def test_delete_replicas_from_datasets(self):
        """ REPLICA (CORE): Delete replicas from dataset """
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        nbfiles = 5
        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]

        add_did(scope=tmp_scope,
                name=tmp_dsn1,
                type=DIDType.DATASET,
                account='root')
        add_did(scope=tmp_scope,
                name=tmp_dsn2,
                type=DIDType.DATASET,
                account='root')

        attach_dids(scope=tmp_scope,
                    name=tmp_dsn1,
                    rse='MOCK',
                    dids=files1,
                    account='root')
        attach_dids(scope=tmp_scope,
                    name=tmp_dsn2,
                    dids=files1,
                    account='root')

        set_status(scope=tmp_scope, name=tmp_dsn1, open=False)

        delete_replicas(rse='MOCK', files=files1)

        with assert_raises(DataIdentifierNotFound):
            get_did(scope=tmp_scope, name=tmp_dsn1)

        get_did(scope=tmp_scope, name=tmp_dsn2)

        assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)],
                     [])
Ejemplo n.º 11
0
    def test_delete_replicas(self):
        """ REPLICA (CORE): Delete replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        add_replicas(rse='MOCK',
                     files=files1,
                     account='root',
                     ignore_availability=True)

        files2 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        add_replicas(rse='MOCK',
                     files=files2,
                     account='root',
                     ignore_availability=True)
        add_replicas(rse='MOCK3',
                     files=files2,
                     account='root',
                     ignore_availability=True)

        delete_replicas(rse='MOCK', files=files1 + files2)

        for file in files1:
            with assert_raises(DataIdentifierNotFound):
                print(get_did(scope=file['scope'], name=file['name']))

        for file in files2:
            get_did(scope=file['scope'], name=file['name'])
Ejemplo n.º 12
0
def delete_replicas(rse,
                    files,
                    issuer,
                    ignore_availability=False,
                    vo='def',
                    session=None):
    """
    Bulk delete file replicas.

    :param rse: The RSE name.
    :param files: The list of files.
    :param issuer: The issuer account.
    :param ignore_availability: Ignore blocked RSEs.
    :param vo: The VO to act on.
    :param session: The database session in use.

    :returns: True is successful, False otherwise
    """
    validate_schema(name='r_dids', obj=files, vo=vo)

    rse_id = get_rse_id(rse=rse, vo=vo, session=session)

    kwargs = {'rse': rse, 'rse_id': rse_id}
    if not permission.has_permission(issuer=issuer,
                                     vo=vo,
                                     action='delete_replicas',
                                     kwargs=kwargs,
                                     session=session):
        raise exception.AccessDenied(
            'Account %s can not delete file replicas on %s' % (issuer, rse))
    if not permission.has_permission(issuer=issuer,
                                     vo=vo,
                                     action='skip_availability_check',
                                     kwargs=kwargs,
                                     session=session):
        ignore_availability = False

    for f in files:
        f['scope'] = InternalScope(f['scope'], vo=vo)

    replica.delete_replicas(rse_id=rse_id,
                            files=files,
                            ignore_availability=ignore_availability,
                            session=session)
Ejemplo n.º 13
0
def delete_replicas(rse, files, issuer, ignore_availability=False):
    """
    Bulk delete file replicas.

    :param rse: The RSE name.
    :param files: The list of files.
    :param issuer: The issuer account.
    :param ignore_availability: Ignore the RSE blacklisting.

    :returns: True is successful, False otherwise
    """
    validate_schema(name='r_dids', obj=files)

    kwargs = {'rse': rse}
    if not permission.has_permission(issuer=issuer, action='delete_replicas', kwargs=kwargs):
        raise exception.AccessDenied('Account %s can not delete file replicas on %s' % (issuer, rse))
    if not permission.has_permission(issuer=issuer, action='skip_availability_check', kwargs=kwargs):
        ignore_availability = False
    replica.delete_replicas(rse=rse, files=files, ignore_availability=ignore_availability)
Ejemplo n.º 14
0
def mock_request(db_session, vo, source_rse, dest_rse, file):
    account = InternalAccount('root', vo=vo)

    add_replicas(rse_id=source_rse['id'],
                 files=[file],
                 account=account,
                 session=db_session)

    request = models.Request(state=RequestState.PREPARING,
                             scope=file['scope'],
                             name=file['name'],
                             dest_rse_id=dest_rse['id'],
                             account=account)
    request.save(session=db_session)
    db_session.commit()

    yield request

    request.delete(session=db_session)
    delete_replicas(rse_id=source_rse['id'], files=[file], session=db_session)
    db_session.commit()
Ejemplo n.º 15
0
 def __cleanup_replicas(self, session=None):
     query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id). \
         filter(or_(and_(models.ReplicationRule.scope == did['scope'],
                         models.ReplicationRule.name == did['name'])
                    for did in self.created_dids))
     dids_by_rse = {}
     for scope, name, rse_id in query:
         dids_by_rse.setdefault(rse_id, []).append({
             'scope': scope,
             'name': name
         })
     for rse_id, dids in dids_by_rse.items():
         replica_core.delete_replicas(rse_id=rse_id,
                                      files=dids,
                                      session=session)
     # Cleanup BadReplicas
     session.query(models.BadReplicas).filter(
         or_(
             and_(models.BadReplicas.scope == did['scope'],
                  models.BadReplicas.name == did['name'])
             for did in self.created_dids)).delete(
                 synchronize_session=False)
Ejemplo n.º 16
0
    def cleanup(self, session=None):
        if not self.created_rses:
            return
        # Cleanup Transfers
        session.query(models.Source).filter(or_(models.Source.dest_rse_id.in_(self.created_rses),
                                                models.Source.rse_id.in_(self.created_rses))).delete(synchronize_session=False)
        session.query(models.Request).filter(or_(models.Request.dest_rse_id.in_(self.created_rses),
                                                 models.Request.source_rse_id.in_(self.created_rses))).delete(synchronize_session=False)

        # Cleanup Locks and Rules
        query = session.query(models.ReplicationRule.id). \
            join(models.ReplicaLock, models.ReplicationRule.id == models.ReplicaLock.rule_id). \
            filter(models.ReplicaLock.rse_id.in_(self.created_rses)).distinct()
        for rule_id, in query:
            rule_core.delete_rule(rule_id, session=session)

        # Cleanup Replicas and Parent Datasets
        query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id). \
            filter(models.RSEFileAssociation.rse_id.in_(self.created_rses))
        dids_by_rse = {}
        for scope, name, rse_id in query:
            dids_by_rse.setdefault(rse_id, []).append({'scope': scope, 'name': name})
        for rse_id, dids in dids_by_rse.items():
            replica_core.delete_replicas(rse_id=rse_id, files=dids, session=session)

        # Cleanup RSEs
        for model in (models.RSEAttrAssociation, models.RSEProtocols, models.UpdatedRSECounter,
                      models.RSEUsage, models.RSELimit, models.RSETransferLimit, models.RSEQoSAssociation):
            session.query(model).filter(model.rse_id.in_(self.created_rses)).delete(synchronize_session=False)

        session.query(models.Distance).filter(or_(models.Distance.src_rse_id.in_(self.created_rses),
                                                  models.Distance.dest_rse_id.in_(self.created_rses))).delete(synchronize_session=False)
        for rse_id in self.created_rses:
            # Only archive RSE instead of deleting. Account handling code doesn't expect RSEs to ever be deleted.
            # So running test in parallel results in some tests failing on foreign key errors.
            rse_core.del_rse(rse_id, session=session)
Ejemplo n.º 17
0
def reaper(rses,
           include_rses,
           exclude_rses,
           vos=None,
           chunk_size=100,
           once=False,
           greedy=False,
           scheme=None,
           delay_seconds=0,
           sleep_time=60,
           auto_exclude_threshold=100,
           auto_exclude_timeout=600):
    """
    Main loop to select and delete files.

    :param rses:                   List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param include_rses:           RSE expression to include RSEs.
    :param exclude_rses:           RSE expression to exclude RSEs from the Reaper.
    :param vos:                    VOs on which to look for RSEs. Only used in multi-VO mode.
                                   If None, we either use all VOs if run from "def", or the current VO otherwise.
    :param chunk_size:             The size of chunk for deletion.
    :param once:                   If True, only runs one iteration of the main loop.
    :param greedy:                 If True, delete right away replicas with tombstone.
    :param scheme:                 Force the reaper to use a particular protocol, e.g., mock.
    :param delay_seconds:          The delay to query replicas in BEING_DELETED state.
    :param sleep_time:             Time between two cycles.
    :param auto_exclude_threshold: Number of service unavailable exceptions after which the RSE gets temporarily excluded.
    :param auto_exclude_timeout:   Timeout for temporarily excluded RSEs.
    """
    hostname = socket.getfqdn()
    executable = 'reaper'
    pid = os.getpid()
    hb_thread = threading.current_thread()
    sanity_check(executable=executable, hostname=hostname)
    heart_beat = live(executable, hostname, pid, hb_thread)
    prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'],
                                      heart_beat['nr_threads'])
    logger = formatted_logger(logging.log, prepend_str + '%s')

    logger(logging.INFO, 'Reaper starting')

    if not once:
        GRACEFUL_STOP.wait(
            10
        )  # To prevent running on the same partition if all the reapers restart at the same time
    heart_beat = live(executable, hostname, pid, hb_thread)
    prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'],
                                      heart_beat['nr_threads'])
    logger = formatted_logger(logging.log, prepend_str + '%s')
    logger(logging.INFO, 'Reaper started')

    while not GRACEFUL_STOP.is_set():
        # try to get auto exclude parameters from the config table. Otherwise use CLI parameters.
        try:
            auto_exclude_threshold = config_get('reaper',
                                                'auto_exclude_threshold',
                                                default=auto_exclude_threshold)
            auto_exclude_timeout = config_get('reaper',
                                              'auto_exclude_timeout',
                                              default=auto_exclude_timeout)
        except (NoOptionError, NoSectionError, RuntimeError):
            pass

        # Check if there is a Judge Evaluator backlog
        try:
            max_evaluator_backlog_count = config_get(
                'reaper', 'max_evaluator_backlog_count')
        except (NoOptionError, NoSectionError, RuntimeError):
            max_evaluator_backlog_count = None
        try:
            max_evaluator_backlog_duration = config_get(
                'reaper', 'max_evaluator_backlog_duration')
        except (NoOptionError, NoSectionError, RuntimeError):
            max_evaluator_backlog_duration = None
        if max_evaluator_backlog_count or max_evaluator_backlog_duration:
            backlog = get_evaluation_backlog()
            if max_evaluator_backlog_count and \
               backlog[0] and \
               max_evaluator_backlog_duration and \
               backlog[1] and \
               backlog[0] > max_evaluator_backlog_count and \
               backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration):
                logger(
                    logging.ERROR,
                    'Reaper: Judge evaluator backlog count and duration hit, stopping operation'
                )
                GRACEFUL_STOP.wait(30)
                continue
            elif max_evaluator_backlog_count and backlog[
                    0] and backlog[0] > max_evaluator_backlog_count:
                logger(
                    logging.ERROR,
                    'Reaper: Judge evaluator backlog count hit, stopping operation'
                )
                GRACEFUL_STOP.wait(30)
                continue
            elif max_evaluator_backlog_duration and backlog[
                    1] and backlog[1] < datetime.utcnow() - timedelta(
                        minutes=max_evaluator_backlog_duration):
                logger(
                    logging.ERROR,
                    'Reaper: Judge evaluator backlog duration hit, stopping operation'
                )
                GRACEFUL_STOP.wait(30)
                continue

        rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses,
                                              vos)
        if not rses_to_process:
            logger(logging.ERROR,
                   'Reaper: No RSEs found. Will sleep for 30 seconds')
            GRACEFUL_STOP.wait(30)
            continue
        start_time = time.time()
        try:
            staging_areas = []
            dict_rses = {}
            heart_beat = live(executable,
                              hostname,
                              pid,
                              hb_thread,
                              older_than=3600)
            prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'],
                                              heart_beat['nr_threads'])
            logger = formatted_logger(logging.log, prepend_str + '%s')
            tot_needed_free_space = 0
            for rse in rses_to_process:
                # Check if the RSE is a staging area
                if rse['staging_area']:
                    staging_areas.append(rse['rse'])
                # Check if RSE is blocklisted
                if rse['availability'] % 2 == 0:
                    logger(logging.DEBUG, 'RSE %s is blocklisted for delete',
                           rse['rse'])
                    continue
                needed_free_space, only_delete_obsolete = __check_rse_usage(
                    rse['rse'], rse['id'], greedy=greedy, logger=logger)
                if needed_free_space:
                    dict_rses[(rse['rse'], rse['id'])] = [
                        needed_free_space, only_delete_obsolete
                    ]
                    tot_needed_free_space += needed_free_space
                elif only_delete_obsolete:
                    dict_rses[(rse['rse'], rse['id'])] = [
                        needed_free_space, only_delete_obsolete
                    ]
                else:
                    logger(logging.DEBUG, 'Nothing to delete on %s',
                           rse['rse'])

            # Ordering the RSEs based on the needed free space
            sorted_dict_rses = OrderedDict(
                sorted(dict_rses.items(), key=lambda x: x[1][0], reverse=True))
            logger(logging.DEBUG,
                   'List of RSEs to process ordered by needed space desc: %s',
                   str(sorted_dict_rses))

            # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value
            rses_hostname_mapping = get_rses_to_hostname_mapping()
            # logger(logging.DEBUG, '%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping))

            list_rses_mult = []

            # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity
            for rse_key in dict_rses:
                rse_name, rse_id = rse_key
                # The length of the deletion queue scales inversily with the number of workers
                # The ceil increase the weight of the RSE with small amount of files to delete
                if tot_needed_free_space:
                    max_workers = ceil(dict_rses[rse_key][0] /
                                       tot_needed_free_space * 1000 /
                                       heart_beat['nr_threads'])
                else:
                    max_workers = 1

                list_rses_mult.extend([
                    (rse_name, rse_id, dict_rses[rse_key][0],
                     dict_rses[rse_key][1]) for _ in range(int(max_workers))
                ])
            random.shuffle(list_rses_mult)

            paused_rses = []
            for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult:
                result = REGION.get('pause_deletion_%s' % rse_id,
                                    expiration_time=120)
                if result is not NO_VALUE:
                    paused_rses.append(rse_name)
                    logger(
                        logging.DEBUG,
                        'Not enough replicas to delete on %s during the previous cycle. Deletion paused for a while',
                        rse_name)
                    continue
                result = REGION.get('temporary_exclude_%s' % rse_id,
                                    expiration_time=auto_exclude_timeout)
                if result is not NO_VALUE:
                    logger(
                        logging.WARNING,
                        'Too many failed attempts for %s in last cycle. RSE is temporarly excluded.',
                        rse_name)
                    labels = {'rse': rse_name}
                    EXCLUDED_RSE_GAUGE.labels(**labels).set(1)
                    continue
                labels = {'rse': rse_name}
                EXCLUDED_RSE_GAUGE.labels(**labels).set(0)
                percent = 0
                if tot_needed_free_space:
                    percent = needed_free_space / tot_needed_free_space * 100
                logger(
                    logging.DEBUG,
                    'Working on %s. Percentage of the total space needed %.2f',
                    rse_name, percent)
                try:
                    rse_hostname, rse_info = rses_hostname_mapping[rse_id]
                except KeyError:
                    logger(logging.DEBUG, "Hostname lookup for %s failed.",
                           rse_name)
                    REGION.set('pause_deletion_%s' % rse_id, True)
                    continue
                rse_hostname_key = '%s,%s' % (rse_id, rse_hostname)
                payload_cnt = list_payload_counts(executable,
                                                  older_than=600,
                                                  hash_executable=None,
                                                  session=None)
                # logger(logging.DEBUG, '%s Payload count : %s', prepend_str, str(payload_cnt))
                tot_threads_for_hostname = 0
                tot_threads_for_rse = 0
                for key in payload_cnt:
                    if key and key.find(',') > -1:
                        if key.split(',')[1] == rse_hostname:
                            tot_threads_for_hostname += payload_cnt[key]
                        if key.split(',')[0] == str(rse_id):
                            tot_threads_for_rse += payload_cnt[key]
                max_deletion_thread = get_max_deletion_threads_by_hostname(
                    rse_hostname)
                if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread:
                    logger(
                        logging.DEBUG,
                        'Too many deletion threads for %s on RSE %s. Back off',
                        rse_hostname, rse_name)
                    # Might need to reschedule a try on this RSE later in the same cycle
                    continue
                logger(
                    logging.INFO,
                    'Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s',
                    rse_hostname, tot_threads_for_hostname,
                    max_deletion_thread, rse_name)
                live(executable,
                     hostname,
                     pid,
                     hb_thread,
                     older_than=600,
                     hash_executable=None,
                     payload=rse_hostname_key,
                     session=None)
                logger(logging.DEBUG, 'Total deletion workers for %s : %i',
                       rse_hostname, tot_threads_for_hostname + 1)
                # List and mark BEING_DELETED the files to delete
                del_start_time = time.time()
                only_delete_obsolete = dict_rses[(rse_name, rse_id)][1]
                try:
                    with monitor.record_timer_block(
                            'reaper.list_unlocked_replicas'):
                        if only_delete_obsolete:
                            logger(
                                logging.DEBUG,
                                'Will run list_and_mark_unlocked_replicas on %s. No space needed, will only delete EPOCH tombstoned replicas',
                                rse_name)
                        replicas = list_and_mark_unlocked_replicas(
                            limit=chunk_size,
                            bytes_=needed_free_space,
                            rse_id=rse_id,
                            delay_seconds=delay_seconds,
                            only_delete_obsolete=only_delete_obsolete,
                            session=None)
                    logger(
                        logging.DEBUG,
                        'list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas',
                        rse_name, needed_free_space,
                        time.time() - del_start_time, len(replicas))
                    if len(replicas) < chunk_size:
                        logger(
                            logging.DEBUG,
                            'Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle',
                            rse_name, chunk_size, len(replicas))
                        REGION.set('pause_deletion_%s' % rse_id, True)

                except (DatabaseException, IntegrityError,
                        DatabaseError) as error:
                    logger(logging.ERROR, '%s', str(error))
                    continue
                except Exception:
                    logger(logging.CRITICAL, 'Exception', exc_info=True)
                # Physical  deletion will take place there
                try:
                    prot = rsemgr.create_protocol(rse_info,
                                                  'delete',
                                                  scheme=scheme,
                                                  logger=logger)
                    for file_replicas in chunks(replicas, chunk_size):
                        # Refresh heartbeat
                        live(executable,
                             hostname,
                             pid,
                             hb_thread,
                             older_than=600,
                             hash_executable=None,
                             payload=rse_hostname_key,
                             session=None)
                        del_start_time = time.time()
                        for replica in file_replicas:
                            try:
                                replica['pfn'] = str(
                                    list(
                                        rsemgr.lfns2pfns(
                                            rse_settings=rse_info,
                                            lfns=[{
                                                'scope':
                                                replica['scope'].external,
                                                'name': replica['name'],
                                                'path': replica['path']
                                            }],
                                            operation='delete',
                                            scheme=scheme).values())[0])
                            except (ReplicaUnAvailable,
                                    ReplicaNotFound) as error:
                                logger(
                                    logging.WARNING,
                                    'Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s',
                                    replica['scope'], replica['name'],
                                    rse_name, str(error))
                                replica['pfn'] = None

                            except Exception:
                                logger(logging.CRITICAL,
                                       'Exception',
                                       exc_info=True)

                        deleted_files = delete_from_storage(
                            file_replicas,
                            prot,
                            rse_info,
                            staging_areas,
                            auto_exclude_threshold,
                            logger=logger)
                        logger(logging.INFO,
                               '%i files processed in %s seconds',
                               len(file_replicas),
                               time.time() - del_start_time)

                        # Then finally delete the replicas
                        del_start = time.time()
                        with monitor.record_timer_block(
                                'reaper.delete_replicas'):
                            delete_replicas(rse_id=rse_id, files=deleted_files)
                        logger(
                            logging.DEBUG,
                            'delete_replicas successed on %s : %s replicas in %s seconds',
                            rse_name, len(deleted_files),
                            time.time() - del_start)
                        DELETION_COUNTER.inc(len(deleted_files))
                except Exception:
                    logger(logging.CRITICAL, 'Exception', exc_info=True)

            if paused_rses:
                logger(logging.INFO,
                       'Deletion paused for a while for following RSEs: %s',
                       ', '.join(paused_rses))

            if once:
                break

            daemon_sleep(start_time=start_time,
                         sleep_time=sleep_time,
                         graceful_stop=GRACEFUL_STOP,
                         logger=logger)

        except DatabaseException as error:
            logger(logging.WARNING, 'Reaper:  %s', str(error))
        except Exception:
            logger(logging.CRITICAL, 'Exception', exc_info=True)
        finally:
            if once:
                break

    die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread)
    logger(logging.INFO, 'Graceful stop requested')
    logger(logging.INFO, 'Graceful stop done')
    return
Ejemplo n.º 18
0
def reaper(rses, worker_number=0, child_number=0, total_children=1, chunk_size=100,
           once=False, greedy=False, scheme=None, delay_seconds=0):
    """
    Main loop to select and delete files.

    :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param worker_number: The worker number.
    :param child_number: The child number.
    :param total_children: The total number of children created per worker.
    :param chunk_size: the size of chunk for deletion.
    :param once: If True, only runs one iteration of the main loop.
    :param greedy: If True, delete right away replicas with tombstone.
    :param scheme: Force the reaper to use a particular protocol, e.g., mock.
    """
    logging.info('Starting Reaper: Worker %(worker_number)s, '
                 'child %(child_number)s will work on RSEs: ' % locals() + ', '.join([rse['rse'] for rse in rses]))

    pid = os.getpid()
    thread = threading.current_thread()
    hostname = socket.gethostname()
    executable = ' '.join(sys.argv)
    # Generate a hash just for the subset of RSEs
    rse_names = [rse['rse'] for rse in rses]
    hash_executable = hashlib.sha256((sys.argv[0] + ''.join(rse_names)).encode()).hexdigest()
    sanity_check(executable=None, hostname=hostname)

    nothing_to_do = {}
    while not GRACEFUL_STOP.is_set():
        try:
            # heartbeat
            heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable)
            checkpoint_time = datetime.datetime.now()
            # logging.info('Reaper({0[worker_number]}/{0[child_number]}): Live gives {0[heartbeat]}'.format(locals()))

            max_deleting_rate = 0
            for rse in sort_rses(rses):
                try:
                    if checkpoint_time + datetime.timedelta(minutes=1) < datetime.datetime.now():
                        heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable)
                        # logging.info('Reaper({0[worker_number]}/{0[child_number]}): Live gives {0[heartbeat]}'.format(locals()))
                        checkpoint_time = datetime.datetime.now()

                    if rse['id'] in nothing_to_do and nothing_to_do[rse['id']] > datetime.datetime.now():
                        continue
                    logging.info('Reaper %s-%s: Running on RSE %s %s', worker_number, child_number,
                                 rse['rse'], nothing_to_do.get(rse['id']))

                    rse_info = rsemgr.get_rse_info(rse_id=rse['id'])
                    rse_protocol = rse_core.get_rse_protocols(rse_id=rse['id'])

                    if not rse_protocol['availability_delete']:
                        logging.info('Reaper %s-%s: RSE %s is not available for deletion', worker_number, child_number, rse_info['rse'])
                        nothing_to_do[rse['id']] = datetime.datetime.now() + datetime.timedelta(minutes=30)
                        continue

                    # Temporary hack to force gfal for deletion
                    for protocol in rse_info['protocols']:
                        if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default':
                            protocol['impl'] = 'rucio.rse.protocols.gfal.Default'

                    needed_free_space, max_being_deleted_files = None, 100
                    needed_free_space_per_child = None
                    if not greedy:
                        max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse_id=rse['id'])
                        logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals())
                        if needed_free_space <= 0:
                            needed_free_space, needed_free_space_per_child = 0, 0
                            logging.info('Reaper %s-%s: free space is above minimum limit for %s', worker_number, child_number, rse['rse'])
                        else:
                            if total_children and total_children > 0:
                                needed_free_space_per_child = needed_free_space / float(total_children)

                    start = time.time()
                    with monitor.record_timer_block('reaper.list_unlocked_replicas'):
                        replicas = list_unlocked_replicas(rse_id=rse['id'],
                                                          bytes=needed_free_space_per_child,
                                                          limit=max_being_deleted_files,
                                                          worker_number=child_number,
                                                          total_workers=total_children,
                                                          delay_seconds=delay_seconds)
                    logging.debug('Reaper %s-%s: list_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', worker_number, child_number, rse['rse'], needed_free_space_per_child, time.time() - start, len(replicas))

                    if not replicas:
                        nothing_to_do[rse['id']] = datetime.datetime.now() + datetime.timedelta(minutes=30)
                        logging.info('Reaper %s-%s: No replicas to delete %s. The next check will occur at %s',
                                     worker_number, child_number, rse['rse'],
                                     nothing_to_do[rse['id']])
                        continue

                    prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme)
                    for files in chunks(replicas, chunk_size):
                        logging.debug('Reaper %s-%s: Running on : %s', worker_number, child_number, str(files))
                        try:
                            update_replicas_states(replicas=[dict(list(replica.items()) + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files], nowait=True)
                            for replica in files:
                                try:
                                    replica['pfn'] = str(list(rsemgr.lfns2pfns(rse_settings=rse_info,
                                                                               lfns=[{'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path']}],
                                                                               operation='delete', scheme=scheme).values())[0])
                                except (ReplicaUnAvailable, ReplicaNotFound) as error:
                                    err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(error))
                                    logging.warning('Reaper %s-%s: %s', worker_number, child_number, err_msg)
                                    replica['pfn'] = None

                            monitor.record_counter(counters='reaper.deletion.being_deleted', delta=len(files))

                            try:
                                deleted_files = []
                                prot.connect()
                                for replica in files:
                                    try:
                                        deletion_dict = {'scope': replica['scope'].external,
                                                         'name': replica['name'],
                                                         'rse': rse_info['rse'],
                                                         'rse_id': rse_info['id'],
                                                         'file-size': replica['bytes'],
                                                         'bytes': replica['bytes'],
                                                         'url': replica['pfn'],
                                                         'protocol': prot.attributes['scheme']}
                                        if replica['scope'].vo != 'def':
                                            deletion_dict['vo'] = replica['scope'].vo
                                        logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                        start = time.time()
                                        if rse['staging_area'] or rse['rse'].endswith("STAGING"):
                                            logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion',
                                                            worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                        else:
                                            if replica['pfn']:
                                                pfn = replica['pfn']
                                                # sign the URL if necessary
                                                if prot.attributes['scheme'] == 'https' and rse_info['sign_url'] is not None:
                                                    pfn = get_signed_url(rse['id'], rse_info['sign_url'], 'delete', pfn)
                                                prot.delete(pfn)
                                            else:
                                                logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                        monitor.record_timer('daemons.reaper.delete.%s.%s' % (prot.attributes['scheme'], rse['rse']), (time.time() - start) * 1000)
                                        duration = time.time() - start

                                        deleted_files.append({'scope': replica['scope'], 'name': replica['name']})

                                        deletion_dict['duration'] = duration
                                        add_message('deletion-done', deletion_dict)
                                        logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], duration)
                                    except SourceNotFound:
                                        err_msg = 'Deletion NOTFOUND of %s:%s as %s on %s' % (replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                        logging.warning(err_msg)
                                        deleted_files.append({'scope': replica['scope'], 'name': replica['name']})
                                        if replica['state'] == ReplicaState.AVAILABLE:
                                            deletion_dict['reason'] = str(err_msg)
                                            add_message('deletion-failed', deletion_dict)
                                    except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error:
                                        logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(error))
                                        deletion_dict['reason'] = str(error)
                                        add_message('deletion-failed', deletion_dict)
                                    except Exception as error:
                                        logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))
                                        deletion_dict['reason'] = str(error)
                                        add_message('deletion-failed', deletion_dict)
                                    except:
                                        logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))
                            except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error:
                                for replica in files:
                                    logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(error))
                                    payload = {'scope': replica['scope'].external,
                                               'name': replica['name'],
                                               'rse': rse_info['rse'],
                                               'rse_id': rse_info['id'],
                                               'file-size': replica['bytes'],
                                               'bytes': replica['bytes'],
                                               'url': replica['pfn'],
                                               'reason': str(error),
                                               'protocol': prot.attributes['scheme']}
                                    if replica['scope'].vo != 'def':
                                        deletion_dict['vo'] = replica['scope'].vo
                                    add_message('deletion-failed', payload)
                                    break
                            finally:
                                prot.close()
                            start = time.time()
                            with monitor.record_timer_block('reaper.delete_replicas'):
                                delete_replicas(rse_id=rse['id'], files=deleted_files)
                            logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s', worker_number, child_number, rse['rse'], len(deleted_files), time.time() - start)
                            monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files))

                        except DatabaseException as error:
                            logging.warning('Reaper %s-%s: DatabaseException %s', worker_number, child_number, str(error))
                        except UnsupportedOperation as error:
                            logging.warning('Reaper %s-%s: UnsupportedOperation %s', worker_number, child_number, str(error))
                        except:
                            logging.critical(traceback.format_exc())

                except RSENotFound as error:
                    logging.warning('Reaper %s-%s: RSE not found %s', worker_number, child_number, str(error))

                except:
                    logging.critical(traceback.format_exc())

            if once:
                break

            time.sleep(1)

        except DatabaseException as error:
            logging.warning('Reaper:  %s', str(error))
        except:
            logging.critical(traceback.format_exc())

    die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable)
    logging.info('Graceful stop requested')
    logging.info('Graceful stop done')
    return
Ejemplo n.º 19
0
 def teardown(req):
     delete_replicas(rse_id=source_rse['id'],
                     files=[file],
                     session=req.db_session)
Ejemplo n.º 20
0
def reaper(rses, worker_number=1, child_number=1, total_children=1, chunk_size=100, once=False, greedy=False, scheme=None, exclude_rses=None, delay_seconds=0):
    """
    Main loop to select and delete files.

    :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param worker_number: The worker number.
    :param child_number: The child number.
    :param total_children: The total number of children created per worker.
    :param chunk_size: the size of chunk for deletion.
    :param once: If True, only runs one iteration of the main loop.
    :param greedy: If True, delete right away replicas with tombstone.
    :param scheme: Force the reaper to use a particular protocol, e.g., mock.
    :param exclude_rses: RSE expression to exclude RSEs from the Reaper.
    """
    logging.info('Starting reaper: worker %(worker_number)s, child %(child_number)s' % locals())
    while not graceful_stop.is_set():
        try:
            max_deleting_rate = 0

            for rse in rses:
                deleting_rate = 0
                rse_info = rsemgr.get_rse_info(rse['rse'])
                rse_protocol = rse_core.get_rse_protocols(rse['rse'])

                if not rse_protocol['availability_delete']:
                    logging.info('Reaper %s-%s: RSE %s is not available for deletion' % (worker_number, child_number, rse_info['rse']))
                    continue

                # Temporary hack to force gfal for deletion
                for protocol in rse_info['protocols']:
                    if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default':
                        protocol['impl'] = 'rucio.rse.protocols.gfal.Default'

                logging.info('Reaper %s-%s: Running on RSE %s' % (worker_number, child_number, rse_info['rse']))
                try:
                    needed_free_space, max_being_deleted_files = None, 10000
                    if not greedy:
                        max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse=rse['rse'], rse_id=rse['id'])
                        logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals())
                        if needed_free_space <= 0:
                            needed_free_space = 0
                            logging.info('Reaper %s-%s: free space is above minimum limit for %s' % (worker_number, child_number, rse['rse']))

                    s = time.time()
                    with monitor.record_timer_block('reaper.list_unlocked_replicas'):
                        replicas = list_unlocked_replicas(rse=rse['rse'], bytes=needed_free_space, limit=max_being_deleted_files, worker_number=child_number, total_workers=total_children, delay_seconds=delay_seconds)
                    logging.debug('Reaper %s-%s: list_unlocked_replicas %s %s %s' % (worker_number, child_number, rse['rse'], time.time() - s, len(replicas)))

                    if not replicas:
                        logging.info('Reaper %s-%s: nothing to do for %s' % (worker_number, child_number, rse['rse']))
                        continue

                    p = rsemgr.create_protocol(rse_info, 'delete', scheme=None)
                    for files in chunks(replicas, chunk_size):
                        logging.debug('Reaper %s-%s: Running on : %s' % (worker_number, child_number, str(files)))
                        try:
                            s = time.time()
                            update_replicas_states(replicas=[dict(replica.items() + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files])

                            for replica in files:
                                try:
                                    replica['pfn'] = str(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'], 'name': replica['name']}, ], operation='delete').values()[0])
                                except ReplicaUnAvailable as e:
                                    err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(e))
                                    logging.warning('Reaper %s-%s: %s' % (worker_number, child_number, err_msg))
                                    replica['pfn'] = None

                                add_message('deletion-planned', {'scope': replica['scope'],
                                                                 'name': replica['name'],
                                                                 'file-size': replica['bytes'],
                                                                 'url': replica['pfn'],
                                                                 'rse': rse_info['rse']})

                            # logging.debug('update_replicas_states %s' % (time.time() - s))
                            monitor.record_counter(counters='reaper.deletion.being_deleted',  delta=len(files))

                            if not scheme:
                                try:
                                    deleted_files = []
                                    p.connect()
                                    for replica in files:
                                        try:
                                            logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                            s = time.time()
                                            if rse['staging_area'] or rse['rse'].endswith("STAGING"):
                                                logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion' % (worker_number,
                                                                                                                                                                                    child_number,
                                                                                                                                                                                    replica['scope'],
                                                                                                                                                                                    replica['name'],
                                                                                                                                                                                    replica['pfn'],
                                                                                                                                                                                    rse['rse']))
                                            else:
                                                if replica['pfn']:
                                                    p.delete(replica['pfn'])
                                                else:
                                                    logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                            monitor.record_timer('daemons.reaper.delete.%s.%s' % (p.attributes['scheme'], rse['rse']), (time.time()-s)*1000)
                                            duration = time.time() - s

                                            deleted_files.append({'scope': replica['scope'], 'name': replica['name']})

                                            add_message('deletion-done', {'scope': replica['scope'],
                                                                          'name': replica['name'],
                                                                          'rse': rse_info['rse'],
                                                                          'file-size': replica['bytes'],
                                                                          'url': replica['pfn'],
                                                                          'duration': duration})
                                            logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']))
                                        except SourceNotFound:
                                            err_msg = 'Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])
                                            logging.warning(err_msg)
                                            deleted_files.append({'scope': replica['scope'], 'name': replica['name']})
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': err_msg})
                                        except (ServiceUnavailable, RSEAccessDenied) as e:
                                            logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e)))
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': str(e)})
                                        except Exception as e:
                                            logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())))
                                            add_message('deletion-failed', {'scope': replica['scope'],
                                                                            'name': replica['name'],
                                                                            'rse': rse_info['rse'],
                                                                            'file-size': replica['bytes'],
                                                                            'url': replica['pfn'],
                                                                            'reason': str(e)})
                                        except:
                                            logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())))
                                except (ServiceUnavailable, RSEAccessDenied) as e:
                                    for replica in files:
                                        logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e)))
                                        add_message('deletion-failed', {'scope': replica['scope'],
                                                                        'name': replica['name'],
                                                                        'rse': rse_info['rse'],
                                                                        'file-size': replica['bytes'],
                                                                        'url': replica['pfn'],
                                                                        'reason': str(e)})
                                finally:
                                    p.close()
                            s = time.time()
                            with monitor.record_timer_block('reaper.delete_replicas'):
                                delete_replicas(rse=rse['rse'], files=deleted_files)
                            logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s' % (worker_number, child_number, rse['rse'], len(deleted_files), time.time() - s))
                            monitor.record_counter(counters='reaper.deletion.done',  delta=len(deleted_files))
                            deleting_rate += len(deleted_files)
                        except:
                            logging.critical(traceback.format_exc())
                    deleting_rate = deleting_rate * 1.0 / max_being_deleted_files
                    if deleting_rate > max_deleting_rate:
                        max_deleting_rate = deleting_rate
                except:
                    logging.critical(traceback.format_exc())

            if once:
                break

            logging.info(" Reaper %s-%s: max_deleting_rate: %s " % (worker_number, child_number, max_deleting_rate))
            sleep_time = int((1 - max_deleting_rate) * 60 + 1)
            time.sleep(sleep_time)

        except:
            logging.critical(traceback.format_exc())

    logging.info('Graceful stop requested')
    logging.info('Graceful stop done')
Ejemplo n.º 21
0
def protocols_setup(vo):
    rse_info = copy.deepcopy(base_rse_info)

    files = [{
        'scope': InternalScope('mock', vo=vo),
        'name': 'element_0',
        'bytes': 1234,
        'adler32': 'deadbeef'
    }]
    root = InternalAccount('root', vo=vo)

    for idx in range(len(rse_info)):
        rse_info[idx]['name'] = '%s_%s' % (rse_info[idx]['site'],
                                           rse_name_generator())
        rse_info[idx]['id'] = add_rse(rse_info[idx]['name'], vo=vo)
        add_rse_attribute(rse_id=rse_info[idx]['id'],
                          key='site',
                          value=base_rse_info[idx]['site'])
        add_replicas(rse_id=rse_info[idx]['id'], files=files, account=root)

    # invalidate cache for parse_expression('site=…')
    rse_expression_parser.REGION.invalidate()

    # check sites
    for idx in range(len(rse_info)):
        site_rses = rse_expression_parser.parse_expression(
            'site=' + base_rse_info[idx]['site'])
        assert len(site_rses) > 0
        assert rse_info[idx]['id'] in [rse['id'] for rse in site_rses]

    add_protocol(
        rse_info[0]['id'], {
            'scheme': schemes[0],
            'hostname': ('root.%s' % base_rse_info[0]['address']),
            'port': 1409,
            'prefix': '//test/chamber/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })
    add_protocol(
        rse_info[0]['id'], {
            'scheme': schemes[2],
            'hostname': ('davs.%s' % base_rse_info[0]['address']),
            'port': 443,
            'prefix': '/test/chamber/',
            'impl': 'rucio.rse.protocols.gfal.Default',
            'domains': {
                'lan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                },
                'wan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                }
            }
        })
    add_protocol(
        rse_info[0]['id'], {
            'scheme': schemes[1],
            'hostname': ('gsiftp.%s' % base_rse_info[0]['address']),
            'port': 8446,
            'prefix': '/test/chamber/',
            'impl': 'rucio.rse.protocols.gfal.Default',
            'domains': {
                'lan': {
                    'read': 0,
                    'write': 0,
                    'delete': 0
                },
                'wan': {
                    'read': 3,
                    'write': 3,
                    'delete': 3
                }
            }
        })

    add_protocol(
        rse_info[1]['id'], {
            'scheme': schemes[1],
            'hostname': ('gsiftp.%s' % base_rse_info[1]['address']),
            'port': 8446,
            'prefix': '/lambda/complex/',
            'impl': 'rucio.rse.protocols.gfal.Default',
            'domains': {
                'lan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })
    add_protocol(
        rse_info[1]['id'], {
            'scheme': schemes[2],
            'hostname': ('davs.%s' % base_rse_info[1]['address']),
            'port': 443,
            'prefix': '/lambda/complex/',
            'impl': 'rucio.rse.protocols.gfal.Default',
            'domains': {
                'lan': {
                    'read': 0,
                    'write': 0,
                    'delete': 0
                },
                'wan': {
                    'read': 2,
                    'write': 2,
                    'delete': 2
                }
            }
        })
    add_protocol(
        rse_info[1]['id'], {
            'scheme': schemes[0],
            'hostname': ('root.%s' % base_rse_info[1]['address']),
            'port': 1409,
            'prefix': '//lambda/complex/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 3,
                    'write': 3,
                    'delete': 3
                }
            }
        })

    yield {'files': files, 'rse_info': rse_info}

    for info in rse_info:
        delete_replicas(rse_id=info['id'], files=files)
        del_rse_attribute(rse_id=info['id'], key='site')
        del_rse(info['id'])
Ejemplo n.º 22
0
 def tearDown(self):
     delete_replicas(rse_id=self.rse1_id, files=self.files)
     delete_replicas(rse_id=self.rse2_id, files=self.files)
     del_rse(rse_id=self.rse1_id)
     del_rse(rse_id=self.rse2_id)
Ejemplo n.º 23
0
def root_proxy_example_data(vo):
    rse_without_proxy = rse_name_generator()
    rse_without_proxy_id = add_rse(rse_without_proxy, vo=vo)
    add_rse_attribute(rse_id=rse_without_proxy_id,
                      key='site',
                      value='BLACKMESA1')

    rse_with_proxy = rse_name_generator()
    rse_with_proxy_id = add_rse(rse_with_proxy, vo=vo)
    add_rse_attribute(rse_id=rse_with_proxy_id, key='site', value='APERTURE1')

    # APERTURE1 site has an internal proxy
    config_set('root-proxy-internal', 'APERTURE1', 'proxy.aperture.com:1094')

    files = [{
        'scope': InternalScope('mock', vo=vo),
        'name': 'half-life_%s' % i,
        'bytes': 1234,
        'adler32': 'deadbeef',
        'meta': {
            'events': 666
        }
    } for i in range(1, 4)]
    for rse_id in [rse_with_proxy_id, rse_without_proxy_id]:
        add_replicas(rse_id=rse_id,
                     files=files,
                     account=InternalAccount('root', vo=vo),
                     ignore_availability=True)

    add_protocol(
        rse_without_proxy_id, {
            'scheme': 'root',
            'hostname': 'root.blackmesa.com',
            'port': 1409,
            'prefix': '//training/facility/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })

    add_protocol(
        rse_with_proxy_id, {
            'scheme': 'root',
            'hostname': 'root.aperture.com',
            'port': 1409,
            'prefix': '//test/chamber/',
            'impl': 'rucio.rse.protocols.xrootd.Default',
            'domains': {
                'lan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                },
                'wan': {
                    'read': 1,
                    'write': 1,
                    'delete': 1
                }
            }
        })

    yield {
        'files': files,
        'rse_without_proxy': rse_without_proxy,
        'rse_with_proxy': rse_with_proxy
    }

    for rse_id in [rse_with_proxy_id, rse_without_proxy_id]:
        delete_replicas(rse_id=rse_id, files=files)
    del_rse(rse_with_proxy_id)
    del_rse(rse_without_proxy_id)
Ejemplo n.º 24
0
    def test_replica_sorting(self):
        """ REPLICA (CORE): Test the correct sorting of the replicas across WAN and LAN """

        self.rc = ReplicaClient()

        self.rse1 = 'APERTURE_%s' % rse_name_generator()
        self.rse2 = 'BLACKMESA_%s' % rse_name_generator()
        self.rse1_id = add_rse(self.rse1, **self.vo)
        self.rse2_id = add_rse(self.rse2, **self.vo)
        add_rse_attribute(rse_id=self.rse1_id, key='site', value='APERTURE')
        add_rse_attribute(rse_id=self.rse2_id, key='site', value='BLACKMESA')

        self.files = [{
            'scope': InternalScope('mock', **self.vo),
            'name': 'element_0',
            'bytes': 1234,
            'adler32': 'deadbeef'
        }]
        root = InternalAccount('root', **self.vo)
        add_replicas(rse_id=self.rse1_id, files=self.files, account=root)
        add_replicas(rse_id=self.rse2_id, files=self.files, account=root)

        add_protocol(
            self.rse1_id, {
                'scheme': 'root',
                'hostname': 'root.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })
        add_protocol(
            self.rse1_id, {
                'scheme': 'davs',
                'hostname': 'davs.aperture.com',
                'port': 443,
                'prefix': '/test/chamber/',
                'impl': 'rucio.rse.protocols.gfal.Default',
                'domains': {
                    'lan': {
                        'read': 2,
                        'write': 2,
                        'delete': 2
                    },
                    'wan': {
                        'read': 2,
                        'write': 2,
                        'delete': 2
                    }
                }
            })
        add_protocol(
            self.rse1_id, {
                'scheme': 'gsiftp',
                'hostname': 'gsiftp.aperture.com',
                'port': 8446,
                'prefix': '/test/chamber/',
                'impl': 'rucio.rse.protocols.gfal.Default',
                'domains': {
                    'lan': {
                        'read': 0,
                        'write': 0,
                        'delete': 0
                    },
                    'wan': {
                        'read': 3,
                        'write': 3,
                        'delete': 3
                    }
                }
            })

        add_protocol(
            self.rse2_id, {
                'scheme': 'gsiftp',
                'hostname': 'gsiftp.blackmesa.com',
                'port': 8446,
                'prefix': '/lambda/complex/',
                'impl': 'rucio.rse.protocols.gfal.Default',
                'domains': {
                    'lan': {
                        'read': 2,
                        'write': 2,
                        'delete': 2
                    },
                    'wan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    }
                }
            })
        add_protocol(
            self.rse2_id, {
                'scheme': 'davs',
                'hostname': 'davs.blackmesa.com',
                'port': 443,
                'prefix': '/lambda/complex/',
                'impl': 'rucio.rse.protocols.gfal.Default',
                'domains': {
                    'lan': {
                        'read': 0,
                        'write': 0,
                        'delete': 0
                    },
                    'wan': {
                        'read': 2,
                        'write': 2,
                        'delete': 2
                    }
                }
            })
        add_protocol(
            self.rse2_id, {
                'scheme': 'root',
                'hostname': 'root.blackmesa.com',
                'port': 1409,
                'prefix': '//lambda/complex/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 3,
                        'write': 3,
                        'delete': 3
                    }
                }
            })

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                schemes=['root', 'gsiftp', 'davs'],
                client_location={'site': 'APERTURE'})
        ]
        pfns = [r['pfns'] for r in replicas][0]
        assert_equal(len(pfns.keys()), 5)
        assert_equal(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['domain'], 'lan')
        assert_equal(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['priority'], 1)
        assert_equal(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['domain'], 'lan')
        assert_equal(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['priority'], 2)
        assert_equal(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['priority'], 3)
        assert_equal(
            pfns[
                'davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']
            ['priority'], 4)
        assert_equal(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['priority'], 5)

        replicas = [
            r for r in self.rc.list_replicas(
                dids=[{
                    'scope': 'mock',
                    'name': f['name'],
                    'type': 'FILE'
                } for f in self.files],
                schemes=['root', 'gsiftp', 'davs'],
                client_location={'site': 'BLACKMESA'})
        ]
        pfns = [r['pfns'] for r in replicas][0]
        assert_equal(len(pfns.keys()), 5)
        assert_equal(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['domain'], 'lan')
        assert_equal(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['priority'], 1)
        assert_equal(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['domain'], 'lan')
        assert_equal(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['priority'], 2)
        assert_equal(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['priority'], 3)
        assert_equal(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['priority'], 4)
        assert_equal(
            pfns[
                'gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_equal(
            pfns[
                'gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']
            ['priority'], 5)

        replicas = [
            r
            for r in self.rc.list_replicas(dids=[{
                'scope': 'mock',
                'name': f['name'],
                'type': 'FILE'
            } for f in self.files],
                                           schemes=['root', 'gsiftp', 'davs'],
                                           client_location={'site': 'XEN'})
        ]
        pfns = [r['pfns'] for r in replicas][0]
        assert_equal(len(pfns.keys()), 6)
        # TODO: intractable until RSE sorting is enabled
        assert_equal(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']
            ['priority'], [1, 2])
        assert_equal(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']
            ['priority'], [1, 2])
        assert_equal(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']
            ['priority'], [3, 4])
        assert_equal(
            pfns[
                'davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']
            ['priority'], [3, 4])
        assert_equal(
            pfns[
                'gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']
            ['priority'], [5, 6])
        assert_equal(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['domain'], 'wan')
        assert_in(
            pfns[
                'root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']
            ['priority'], [5, 6])

        ml = self.rc.list_replicas(dids=[{
            'scope': 'mock',
            'name': f['name'],
            'type': 'FILE'
        } for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'APERTURE'})
        assert_in(
            'domain="lan" priority="1" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="lan" priority="2" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="3" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="4" client_extract="false">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="5" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0',
            ml)
        assert_not_in('priority="6"', ml)

        ml = self.rc.list_replicas(dids=[{
            'scope': 'mock',
            'name': f['name'],
            'type': 'FILE'
        } for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'BLACKMESA'})
        assert_in(
            'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0',
            ml)
        assert_not_in('priority="6"', ml)

        # TODO: intractable until RSE sorting is enabled
        # ml = self.rc.list_replicas(dids=[{'scope': 'mock',
        #                                   'name': f['name'],
        #                                   'type': 'FILE'} for f in self.files],
        #                            schemes=['root', 'gsiftp', 'davs'],
        #                            metalink=True,
        #                            client_location={'site': 'XEN'})
        # assert_in('domain="wan" priority="1">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0', ml)
        # assert_in('domain="wan" priority="2">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0', ml)
        # assert_in('domain="wan" priority="3">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0', ml)
        # assert_in('domain="wan" priority="4">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0', ml)
        # assert_in('domain="wan" priority="5">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0', ml)
        # assert_in('domain="wan" priority="6">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0', ml)
        # assert_not_in('priority="7"', ml)

        # ensure correct handling of disabled protocols
        add_protocol(
            self.rse1_id, {
                'scheme': 'root',
                'hostname': 'root2.aperture.com',
                'port': 1409,
                'prefix': '//test/chamber/',
                'impl': 'rucio.rse.protocols.xrootd.Default',
                'domains': {
                    'lan': {
                        'read': 1,
                        'write': 1,
                        'delete': 1
                    },
                    'wan': {
                        'read': 0,
                        'write': 0,
                        'delete': 0
                    }
                }
            })

        ml = self.rc.list_replicas(dids=[{
            'scope': 'mock',
            'name': f['name'],
            'type': 'FILE'
        } for f in self.files],
                                   schemes=['root', 'gsiftp', 'davs'],
                                   metalink=True,
                                   client_location={'site': 'BLACKMESA'})
        assert_in(
            'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0',
            ml)
        assert_in(
            'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0',
            ml)
        assert_not_in('priority="6"', ml)

        delete_replicas(rse_id=self.rse1_id, files=self.files)
        delete_replicas(rse_id=self.rse2_id, files=self.files)
        del_rse(self.rse1_id)
        del_rse(self.rse2_id)
Ejemplo n.º 25
0
 def tearDown(self):
     delete_replicas(rse=self.rse1, files=self.files)
     delete_replicas(rse=self.rse2, files=self.files)
     del_rse(self.rse1)
     del_rse(self.rse2)
Ejemplo n.º 26
0
    def test_update_collection_replica(self):
        """ REPLICA (CORE): Update collection replicas from update requests. """
        file_size = 2
        files = [{
            'name': 'file_%s' % generate_uuid(),
            'scope': self.scope,
            'bytes': file_size
        } for i in range(0, 2)]
        dataset_name = 'dataset_test_%s' % generate_uuid()
        add_replicas(rse_id=self.rse_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        add_did(scope=self.scope,
                name=dataset_name,
                type=constants.DIDType.DATASET,
                account=self.account,
                session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=files,
                    account=self.account,
                    session=self.db_session)
        models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, state=constants.ReplicaState.AVAILABLE, name=dataset_name, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files), available_replicas_cnt=0)\
              .save(session=self.db_session)

        # Update request with rse id
        # First update -> dataset replica should be available
        models.UpdatedCollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                id=update_request.id).first()  # pylint: disable=no-member
        assert update_request is None
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete one file replica -> dataset replica should be unavailable
        delete_replicas(rse_id=self.rse_id,
                        files=[files[0]],
                        session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Add one file replica -> dataset replica should be available again
        add_replicas(rse_id=self.rse_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=[files[0]],
                    account=self.account,
                    session=self.db_session)
        models.UpdatedCollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete all file replicas -> dataset replica should be deleted
        delete_replicas(rse_id=self.rse_id,
                        files=files,
                        session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).all()  # pylint: disable=no-member
        assert len(dataset_replica) == 0

        # Update request without rse_id - using two replicas per file -> total 4 replicas
        add_replicas(rse_id=self.rse_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        add_replicas(rse_id=self.rse2_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=files,
                    account=self.account,
                    session=self.db_session)
        models.CollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            state=constants.ReplicaState.UNAVAILABLE,
            did_type=constants.DIDType.DATASET,
            bytes=len(files) * file_size,
            length=len(files)).save(session=self.db_session)
        models.CollectionReplica(
            rse_id=self.rse2_id,
            scope=self.scope,
            name=dataset_name,
            state=constants.ReplicaState.UNAVAILABLE,
            did_type=constants.DIDType.DATASET,
            bytes=len(files) * file_size,
            length=len(files)).save(session=self.db_session)

        # First update -> replicas should be available
        models.UpdatedCollectionReplica(
            scope=self.scope, name=dataset_name).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        for dataset_replica in self.db_session.query(
                models.CollectionReplica).filter_by(scope=self.scope,
                                                    name=dataset_name).all():  # pylint: disable=no-member
            assert dataset_replica['bytes'] == len(files) * file_size
            assert dataset_replica['length'] == len(files)
            assert dataset_replica['available_bytes'] == len(files) * file_size
            assert dataset_replica['available_replicas_cnt'] == len(files)
            assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete first replica on first RSE -> replica on first RSE should be unavailable, replica on second RSE should be still available
        delete_replicas(rse_id=self.rse_id,
                        files=[files[0]],
                        session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        # delete_replica creates also update object but with rse_id -> extra filter for rse_id is NULL
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Set the state of the first replica on the second RSE to UNAVAILABLE -> both replicass should be unavailable
        file_replica = self.db_session.query(
            models.RSEFileAssociation).filter_by(rse_id=self.rse2_id,
                                                 scope=self.scope,
                                                 name=files[0]['name']).one()  # pylint: disable=no-member
        file_replica.state = constants.ReplicaState.UNAVAILABLE
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Delete first replica on second RSE -> file is not longer part of dataset -> both replicas should be available
        delete_replicas(rse_id=self.rse2_id,
                        files=[files[0]],
                        session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == (len(files) - 1) * file_size
        assert dataset_replica['length'] == len(files) - 1
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == (len(files) - 1) * file_size
        assert dataset_replica['length'] == len(files) - 1
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Add first replica to the first RSE -> first replicas should be available
        add_replicas(rse_id=self.rse_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=[files[0]],
                    account=self.account,
                    session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Add first replica to the second RSE -> both replicas should be available again
        add_replicas(rse_id=self.rse2_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
Ejemplo n.º 27
0
def reaper(rses,
           chunk_size=100,
           once=False,
           greedy=False,
           scheme=None,
           delay_seconds=0,
           sleep_time=60):
    """
    Main loop to select and delete files.

    :param rses:           List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param chunk_size:     The size of chunk for deletion.
    :param once:           If True, only runs one iteration of the main loop.
    :param greedy:         If True, delete right away replicas with tombstone.
    :param scheme:         Force the reaper to use a particular protocol, e.g., mock.
    :param delay_seconds:  The delay to query replicas in BEING_DELETED state.
    :param sleep_time:     Time between two cycles.
    """

    try:
        max_deletion_thread = get('reaper', 'nb_workers_by_hostname')
    except ConfigNotFound as error:
        max_deletion_thread = 5
    hostname = socket.getfqdn()
    executable = sys.argv[0]
    pid = os.getpid()
    hb_thread = threading.current_thread()
    sanity_check(executable=executable, hostname=hostname)
    heart_beat = live(executable, hostname, pid, hb_thread)
    prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1,
                                         heart_beat['nr_threads'])
    logging.info('%s Reaper starting', prepend_str)

    time.sleep(
        10
    )  # To prevent running on the same partition if all the reapers restart at the same time
    heart_beat = live(executable, hostname, pid, hb_thread)
    prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1,
                                         heart_beat['nr_threads'])
    logging.info('%s Reaper started', prepend_str)

    while not GRACEFUL_STOP.is_set():

        start_time = time.time()
        try:
            staging_areas = []
            dict_rses = {}
            heart_beat = live(executable,
                              hostname,
                              pid,
                              hb_thread,
                              older_than=3600)
            prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] +
                                                 1, heart_beat['nr_threads'])
            tot_needed_free_space = 0
            for rse in rses:
                # Check if the RSE is a staging area
                if rse['staging_area']:
                    staging_areas.append(rse['rse'])
                # Check if RSE is blacklisted
                if rse['availability'] % 2 == 0:
                    logging.debug('%s RSE %s is blacklisted for delete',
                                  prepend_str, rse['rse'])
                    continue
                max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(
                    rse['rse'], rse['id'], prepend_str)
                # Check if greedy mode
                if greedy:
                    dict_rses[(rse['rse'], rse['id'])] = [
                        1000000000000, max_being_deleted_files
                    ]
                    tot_needed_free_space += 1000000000000
                else:
                    if needed_free_space:
                        dict_rses[(rse['rse'], rse['id'])] = [
                            needed_free_space, max_being_deleted_files
                        ]
                        tot_needed_free_space += needed_free_space
                    else:
                        logging.debug('%s Nothing to delete on %s',
                                      prepend_str, rse['rse'])

            # Ordering the RSEs based on the needed free space
            sorted_dict_rses = OrderedDict(
                sorted(dict_rses.items(), key=itemgetter(1), reverse=True))
            logging.debug(
                '%s List of RSEs to process ordered by needed space desc : %s',
                prepend_str, str(sorted_dict_rses))

            # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value
            rses_hostname_mapping = get_rses_to_hostname_mapping()
            # logging.debug('%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping))

            list_rses_mult = []

            # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity
            for rse_key in dict_rses:
                rse_name, rse_id = rse_key
                # The length of the deletion queue scales inversily with the number of workers
                # The ceil increase the weight of the RSE with small amount of files to delete
                max_workers = ceil(dict_rses[rse_key][0] /
                                   tot_needed_free_space * 1000 /
                                   heart_beat['nr_threads'])
                list_rses_mult.extend([
                    (rse_name, rse_id, dict_rses[rse_key][0],
                     dict_rses[rse_key][1]) for _ in range(int(max_workers))
                ])
            random.shuffle(list_rses_mult)

            skip_until_next_run = []
            for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult:
                if rse_id in skip_until_next_run:
                    continue
                logging.debug(
                    '%s Working on %s. Percentage of the total space needed %.2f',
                    prepend_str, rse_name,
                    needed_free_space / tot_needed_free_space * 100)
                rse_hostname, rse_info = rses_hostname_mapping[rse_id]
                rse_hostname_key = '%s,%s' % (rse_id, rse_hostname)
                payload_cnt = list_payload_counts(executable,
                                                  older_than=600,
                                                  hash_executable=None,
                                                  session=None)
                # logging.debug('%s Payload count : %s', prepend_str, str(payload_cnt))
                tot_threads_for_hostname = 0
                tot_threads_for_rse = 0
                for key in payload_cnt:
                    if key and key.find(',') > -1:
                        if key.split(',')[1] == rse_hostname:
                            tot_threads_for_hostname += payload_cnt[key]
                        if key.split(',')[0] == str(rse_id):
                            tot_threads_for_rse += payload_cnt[key]

                if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread:
                    logging.debug(
                        '%s Too many deletion threads for %s on RSE %s. Back off',
                        prepend_str, rse_hostname, rse_name)
                    # Might need to reschedule a try on this RSE later in the same cycle
                    continue

                logging.info(
                    '%s Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s',
                    prepend_str, rse_hostname, tot_threads_for_hostname,
                    max_deletion_thread, rse_name)
                live(executable,
                     hostname,
                     pid,
                     hb_thread,
                     older_than=600,
                     hash_executable=None,
                     payload=rse_hostname_key,
                     session=None)
                logging.debug('%s Total deletion workers for %s : %i',
                              prepend_str, rse_hostname,
                              tot_threads_for_hostname + 1)
                # List and mark BEING_DELETED the files to delete
                del_start_time = time.time()
                try:
                    with monitor.record_timer_block(
                            'reaper.list_unlocked_replicas'):
                        replicas = list_and_mark_unlocked_replicas(
                            limit=chunk_size,
                            bytes=needed_free_space,
                            rse_id=rse_id,
                            delay_seconds=delay_seconds,
                            session=None)
                    logging.debug(
                        '%s list_and_mark_unlocked_replicas  on %s for %s bytes in %s seconds: %s replicas',
                        prepend_str, rse_name, needed_free_space,
                        time.time() - del_start_time, len(replicas))
                    if len(replicas) < chunk_size:
                        logging.info(
                            '%s Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle',
                            prepend_str, rse_name, chunk_size, len(replicas))
                        skip_until_next_run.append(rse_id)

                except (DatabaseException, IntegrityError,
                        DatabaseError) as error:
                    logging.error('%s %s', prepend_str, str(error))
                    continue
                except Exception:
                    logging.critical('%s %s', prepend_str,
                                     str(traceback.format_exc()))

                # Physical  deletion will take place there
                try:
                    prot = rsemgr.create_protocol(rse_info,
                                                  'delete',
                                                  scheme=scheme)
                    for file_replicas in chunks(replicas, 100):
                        # Refresh heartbeat
                        live(executable,
                             hostname,
                             pid,
                             hb_thread,
                             older_than=600,
                             hash_executable=None,
                             payload=rse_hostname_key,
                             session=None)
                        del_start_time = time.time()
                        for replica in file_replicas:
                            try:
                                replica['pfn'] = str(
                                    rsemgr.lfns2pfns(
                                        rse_settings=rse_info,
                                        lfns=[{
                                            'scope': replica['scope'],
                                            'name': replica['name'],
                                            'path': replica['path']
                                        }],
                                        operation='delete',
                                        scheme=scheme).values()[0])
                                time.sleep(random.uniform(0, 0.01))
                            except (ReplicaUnAvailable,
                                    ReplicaNotFound) as error:
                                logging.warning(
                                    '%s Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s',
                                    prepend_str, replica['scope'],
                                    replica['name'], rse_name, str(error))
                                replica['pfn'] = None

                            except Exception:
                                logging.critical('%s %s', prepend_str,
                                                 str(traceback.format_exc()))

                        deleted_files = delete_from_storage(
                            file_replicas, prot, rse_info, staging_areas,
                            prepend_str)
                        logging.info('%s %i files processed in %s seconds',
                                     prepend_str, len(file_replicas),
                                     time.time() - del_start_time)

                        # Then finally delete the replicas
                        del_start = time.time()
                        with monitor.record_timer_block(
                                'reaper.delete_replicas'):
                            delete_replicas(rse_id=rse_id, files=deleted_files)
                        logging.debug(
                            '%s delete_replicas successed on %s : %s replicas in %s seconds',
                            prepend_str, rse_name, len(deleted_files),
                            time.time() - del_start)
                        monitor.record_counter(counters='reaper.deletion.done',
                                               delta=len(deleted_files))

                except Exception as error:
                    logging.critical('%s %s', prepend_str,
                                     str(traceback.format_exc()))

            if once:
                break

            tottime = time.time() - start_time
            if tottime < sleep_time:
                logging.info('%s Will sleep for %s seconds', prepend_str,
                             sleep_time - tottime)
                time.sleep(sleep_time - tottime)

        except DatabaseException as error:
            logging.warning('%s Reaper:  %s', prepend_str, str(error))
        except Exception:
            logging.critical('%s %s', prepend_str, str(traceback.format_exc()))
        finally:
            if once:
                break

    die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread)
    logging.info('%s Graceful stop requested', prepend_str)
    logging.info('%s Graceful stop done', prepend_str)
    return
Ejemplo n.º 28
0
 def tearDownClass(cls):
     for rse_id in [cls.rse_with_proxy_id, cls.rse_without_proxy_id]:
         delete_replicas(rse_id=rse_id, files=cls.files)
     del_rse(cls.rse_with_proxy_id)
     del_rse(cls.rse_without_proxy_id)
Ejemplo n.º 29
0
    def test_abacus_collection_replica(self):
        """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
        self.files = [{
            'did_scope': self.scope,
            'did_name': 'file_' + generate_uuid(),
            'path': file_generator(size=self.file_sizes),
            'rse': self.rse,
            'lifetime': -1
        } for _ in range(0, 2)]
        self.did_client.add_did(self.scope,
                                self.dataset,
                                DIDType.DATASET,
                                lifetime=-1)
        self.upload_client.upload(self.files)
        self.did_client.attach_dids(scope=self.scope,
                                    name=self.dataset,
                                    dids=[{
                                        'name': file['did_name'],
                                        'scope': file['did_scope']
                                    } for file in self.files])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        [os.remove(file['path']) for file in self.files]

        # Check dataset replica after rule creation - initial data
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == 0
        assert dataset_replica['length'] == 0
        assert dataset_replica['available_bytes'] == 0
        assert dataset_replica['available_length'] == 0
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Run Abacus
        collection_replica.run(once=True)

        # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['available_bytes'] == len(
            self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files)
        assert str(dataset_replica['state']) == 'AVAILABLE'

        # Delete one file -> collection replica should be unavailable
        cleaner.run(once=True)
        delete_replicas(rse_id=self.rse_id,
                        files=[{
                            'name':
                            self.files[0]['did_name'],
                            'scope':
                            InternalScope(self.files[0]['did_scope'],
                                          **self.vo)
                        }])
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ][0]
        assert dataset_replica['length'] == len(self.files)
        assert dataset_replica['bytes'] == len(self.files) * self.file_sizes
        assert dataset_replica['available_length'] == len(self.files) - 1
        assert dataset_replica['available_bytes'] == (len(self.files) -
                                                      1) * self.file_sizes
        assert str(dataset_replica['state']) == 'UNAVAILABLE'

        # Delete all files -> collection replica should be deleted
        # Old behaviour (doesn't delete the DID)
        cleaner.run(once=True)
        reaper.REGION.invalidate()
        if self.vo:
            reaper.run(once=True,
                       include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse),
                       greedy=True)
        else:
            reaper.run(once=True, include_rses=self.rse, greedy=True)
        self.rule_client.add_replication_rule([{
            'scope': self.scope,
            'name': self.dataset
        }],
                                              1,
                                              self.rse,
                                              lifetime=-1)
        collection_replica.run(once=True)
        dataset_replica = [
            replica for replica in self.replica_client.list_dataset_replicas(
                self.scope, self.dataset)
        ]
        assert dataset_replica[0]['length'] == 0
        assert dataset_replica[0]['available_length'] == 0
Ejemplo n.º 30
0
def test_abacus_collection_replica_new(vo, rse_factory, rucio_client,
                                       did_factory, core_config_mock,
                                       caches_mock):
    """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """
    file_sizes = 2
    nfiles = 2
    dataset_scope = 'mock'
    rse, rse_id = rse_factory.make_posix_rse()
    dids = did_factory.upload_test_dataset(rse_name=rse,
                                           scope=dataset_scope,
                                           size=file_sizes,
                                           nb_files=nfiles)
    files = [{
        'scope': did['did_scope'],
        'name': did['did_name']
    } for did in dids]
    dataset = dids[0]['dataset_name']
    rucio_client.set_metadata(dataset_scope, dataset, 'lifetime', -1)
    rucio_client.add_replication_rule([{
        'scope': dataset_scope,
        'name': dataset
    }],
                                      1,
                                      rse,
                                      lifetime=-1)

    # Check dataset replica after rule creation - initial data
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['bytes'] == 0
    assert dataset_replica['length'] == 0
    assert dataset_replica['available_bytes'] == 0
    assert dataset_replica['available_length'] == 0
    assert str(dataset_replica['state']) == 'UNAVAILABLE'

    # Run Abacus
    collection_replica.run(once=True)

    # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['bytes'] == len(files) * file_sizes
    assert dataset_replica['length'] == len(files)
    assert dataset_replica['available_bytes'] == len(files) * file_sizes
    assert dataset_replica['available_length'] == len(files)
    assert str(dataset_replica['state']) == 'AVAILABLE'

    # Delete one file -> collection replica should be unavailable
    cleaner.run(once=True)
    delete_replicas(rse_id=rse_id,
                    files=[{
                        'name': files[0]['name'],
                        'scope': InternalScope(dataset_scope, vo)
                    }])
    rucio_client.add_replication_rule([{
        'scope': dataset_scope,
        'name': dataset
    }],
                                      1,
                                      rse,
                                      lifetime=-1)
    collection_replica.run(once=True)
    dataset_replica = [
        replica for replica in rucio_client.list_dataset_replicas(
            dataset_scope, dataset)
    ][0]
    assert dataset_replica['length'] == len(files)
    assert dataset_replica['bytes'] == len(files) * file_sizes
    assert dataset_replica['available_length'] == len(files) - 1
    assert dataset_replica['available_bytes'] == (len(files) - 1) * file_sizes
    assert str(dataset_replica['state']) == 'UNAVAILABLE'

    # Delete all files -> collection replica should be deleted
    # New behaviour (dataset should be deleted)
    cleaner.run(once=True)
    delete_replicas(rse_id=rse_id,
                    files=[{
                        'name': files[1]['name'],
                        'scope': InternalScope(dataset_scope, vo)
                    }])
    with pytest.raises(DataIdentifierNotFound):
        get_did(scope=InternalScope(dataset_scope), name=dataset)
Ejemplo n.º 31
0
 def tearDown(self):
     for rse in [self.rse_with_proxy, self.rse_without_proxy]:
         delete_replicas(rse=rse, files=self.files)
     del_rse(self.rse_with_proxy)
     del_rse(self.rse_without_proxy)
Ejemplo n.º 32
0
 def tearDown(self):
     for rse_id in [self.rse_with_proxy_id, self.rse_without_proxy_id]:
         delete_replicas(rse_id=rse_id, files=self.files)
     del_rse(self.rse_with_proxy_id)
     del_rse(self.rse_without_proxy_id)
Ejemplo n.º 33
0
 def tearDown(self):
     delete_replicas(rse_id=self.rses3_id, files=self.files3)
     delete_replicas(rse_id=self.rsenons3_id, files=self.filenons3)
     del_rse(self.rses3_id)
     del_rse(self.rsenons3_id)
Ejemplo n.º 34
0
            replica_cpt += 1

        assert_equal(nbfiles, replica_cpt)

    def test_delete_replicas(self):
        """ REPLICA (CORE): Delete replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
        add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True)

        files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
        add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True)
        add_replicas(rse='MOCK3', files=files2, account='root', ignore_availability=True)

        delete_replicas(rse='MOCK', files=files1 + files2)

        for file in files1:
            with assert_raises(DataIdentifierNotFound):
                get_did(scope=file['scope'], name=file['name'])

        for file in files2:
            get_did(scope=file['scope'], name=file['name'])

    def test_delete_replicas_from_datasets(self):
        """ REPLICA (CORE): Delete replicas from dataset """
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        nbfiles = 5
        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]