def test_reaper_multi_vo(vo, second_vo, scope_factory, caches_mock, file_config_mock): """ REAPER (DAEMON): Test the reaper daemon with multiple vo.""" [cache_region] = caches_mock new_vo = second_vo _, [scope_tst, scope_new] = scope_factory(vos=[vo, new_vo]) nb_files = 250 file_size = 200 # 2G rse1_name, rse1_id, dids1 = __add_test_rse_and_replicas(vo=vo, scope=scope_tst, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse2_name, rse2_id, dids2 = __add_test_rse_and_replicas(vo=new_vo, scope=scope_new, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse_core.set_rse_limits(rse_id=rse1_id, name='MinFreeSpace', value=50 * file_size) rse_core.set_rse_limits(rse_id=rse2_id, name='MinFreeSpace', value=50 * file_size) # Check we reap all VOs by default cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse1_id, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_usage(rse_id=rse2_id, source='storage', used=nb_files * file_size, free=1) both_rses = '%s|%s' % (rse1_name, rse2_name) reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None) reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None) assert len(list(replica_core.list_replicas(dids=dids1, rse_expression=both_rses))) == 200 assert len(list(replica_core.list_replicas(dids=dids2, rse_expression=both_rses))) == 200
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" nb_files = 30 file_size = 2147483648L # 2G for i in xrange(nb_files): replica_core.add_replica(rse='MOCK', scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse='MOCK', source='srm', used=nb_files * file_size, free=800L) rse_core.set_rse_limits(rse='MOCK', name='MinFreeSpace', value=10737418240L) rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10) rses = [ rse_core.get_rse('MOCK'), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses)
def test_archive_of_deleted_dids(vo, did_factory, root_account, core_config_mock, caches_mock, file_config_mock): """ REAPER (DAEMON): Test that the options to keep the did and content history work.""" [reaper_cache_region, _config_cache_region, _replica_cache_region] = caches_mock scope = InternalScope('data13_hip', vo=vo) account = root_account nb_files = 10 file_size = 200 # 2G rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size, epoch_tombstone=True) dataset = did_factory.make_dataset() did_core.attach_dids(dids=dids, account=account, **dataset) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed reaper_cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None, greedy=True) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == 0 file_clause = [] for did in dids: file_clause.append(and_(models.DeletedDataIdentifier.scope == did['scope'], models.DeletedDataIdentifier.name == did['name'])) session = get_session() query = session.query(models.DeletedDataIdentifier.scope, models.DeletedDataIdentifier.name, models.DeletedDataIdentifier.did_type).\ filter(or_(*file_clause)) deleted_dids = list() for did in query.all(): print(did) deleted_dids.append(did) assert len(deleted_dids) == len(dids) query = session.query(models.DataIdentifierAssociationHistory.child_scope, models.DataIdentifierAssociationHistory.child_name, models.DataIdentifierAssociationHistory.child_type).\ filter(and_(models.DataIdentifierAssociationHistory.scope == dataset['scope'], models.DataIdentifierAssociationHistory.name == dataset['name'])) deleted_dids = list() for did in query.all(): print(did) deleted_dids.append(did) assert len(deleted_dids) == len(dids)
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} else: vo = {} rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name, **vo) mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) rses = [rse_core.get_rse(rse_id), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses) assert_equal(len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in file_names], rse_expression=rse_name))), nb_files - 10)
def test_reaper(vo, caches_mock, file_config_mock): """ REAPER (DAEMON): Test the reaper daemon.""" [cache_region] = caches_mock scope = InternalScope('data13_hip', vo=vo) nb_files = 250 file_size = 200 # 2G rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Now put it over threshold and delete cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) assert len(list(replica_core.list_replicas(dids, rse_expression=rse_name))) == 200
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock): [cache_region] = caches_mock rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]], account=account, **archive2) dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)] did_core.add_did(did_type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(did_type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by(**archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be removed, and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exists only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3
def test_source_avoid_deletion(vo, caches_mock, core_config_mock, rse_factory, did_factory, root_account, file_factory): """ Test that sources on a file block it from deletion """ _, reaper_region = caches_mock src_rse1, src_rse1_id = rse_factory.make_mock_rse() src_rse2, src_rse2_id = rse_factory.make_mock_rse() dst_rse, dst_rse_id = rse_factory.make_mock_rse() all_rses = [src_rse1_id, src_rse2_id, dst_rse_id] any_source = f'{src_rse1}|{src_rse2}' for rse_id in [src_rse1_id, src_rse2_id]: rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=1, free=0) distance_core.add_distance(src_rse1_id, dst_rse_id, ranking=20) distance_core.add_distance(src_rse2_id, dst_rse_id, ranking=10) # Upload a test file to both rses without registering did = did_factory.random_did() # Register replica on one source RSE replica_core.add_replica(rse_id=src_rse1_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Reaper will not delete a file which only has one replica if there is any pending transfer for it reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1 # Register replica on second source rse replica_core.add_replica(rse_id=src_rse2_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 # Submit the transfer. This will create the sources. submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # None of the replicas will be removed. They are protected by an entry in the sources table reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 @transactional_session def __delete_sources(rse_id, scope, name, session=None): session.execute( delete(Source).where(Source.rse_id == rse_id, Source.scope == scope, Source.name == name)) # Deletion succeeds for one replica (second still protected by existing request) __delete_sources(src_rse1_id, **did) __delete_sources(src_rse2_id, **did) reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1