def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" nb_files = 30 file_size = 2147483648L # 2G for i in xrange(nb_files): replica_core.add_replica(rse='MOCK', scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse='MOCK', source='srm', used=nb_files * file_size, free=800L) rse_core.set_rse_limits(rse='MOCK', name='MinFreeSpace', value=10737418240L) rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10) rses = [ rse_core.get_rse('MOCK'), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses)
def test_reaper(vo, caches_mock, file_config_mock): """ REAPER (DAEMON): Test the reaper daemon.""" [cache_region] = caches_mock scope = InternalScope('data13_hip', vo=vo) nb_files = 250 file_size = 200 # 2G rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Now put it over threshold and delete cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) assert len(list(replica_core.list_replicas(dids, rse_expression=rse_name))) == 200
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK') for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) argv = ['--run-once', '--rses', 'MOCK'] main(argv) # Test the rucio-reaper console script cmd = 'rucio-reaper ' + ' '.join(argv) exitcode, out, err = execute(cmd) print(cmd, out, err) nose.tools.assert_equal(exitcode, 0)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK') for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip'), name='lfn' + generate_uuid(), bytes=file_size, account=InternalAccount('root'), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) reaper(once=True, rses=[], include_rses='MOCK', exclude_rses=[]) reaper(once=True, rses=[], include_rses='MOCK', exclude_rses=[])
def test_reaper_multi_vo(vo, second_vo, scope_factory, caches_mock, file_config_mock): """ REAPER (DAEMON): Test the reaper daemon with multiple vo.""" [cache_region] = caches_mock new_vo = second_vo _, [scope_tst, scope_new] = scope_factory(vos=[vo, new_vo]) nb_files = 250 file_size = 200 # 2G rse1_name, rse1_id, dids1 = __add_test_rse_and_replicas(vo=vo, scope=scope_tst, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse2_name, rse2_id, dids2 = __add_test_rse_and_replicas(vo=new_vo, scope=scope_new, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size) rse_core.set_rse_limits(rse_id=rse1_id, name='MinFreeSpace', value=50 * file_size) rse_core.set_rse_limits(rse_id=rse2_id, name='MinFreeSpace', value=50 * file_size) # Check we reap all VOs by default cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse1_id, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_usage(rse_id=rse2_id, source='storage', used=nb_files * file_size, free=1) both_rses = '%s|%s' % (rse1_name, rse2_name) reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None) reaper(once=True, rses=[], include_rses=both_rses, exclude_rses=None) assert len(list(replica_core.list_replicas(dids=dids1, rse_expression=both_rses))) == 200 assert len(list(replica_core.list_replicas(dids=dids2, rse_expression=both_rses))) == 200
def test_set_rse_limits(self): """ RSE (CLIENTS): Test the update of RSE limits.""" nb_files = 30 file_size = 2147483648L # 2G for file in xrange(nb_files): replica_core.add_replica(rse='MOCK', scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse='MOCK', source='srm', used=nb_files*file_size, free=800L) rse_core.set_rse_limits(rse='MOCK', name='MinFreeSpace', value=10737418240L) rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10) rses = [rse_core.get_rse('MOCK'), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses)
def test_archive_of_deleted_dids(vo, did_factory, root_account, core_config_mock, caches_mock, file_config_mock): """ REAPER (DAEMON): Test that the options to keep the did and content history work.""" [reaper_cache_region, _config_cache_region, _replica_cache_region] = caches_mock scope = InternalScope('data13_hip', vo=vo) account = root_account nb_files = 10 file_size = 200 # 2G rse_name, rse_id, dids = __add_test_rse_and_replicas(vo=vo, scope=scope, rse_name=rse_name_generator(), names=['lfn' + generate_uuid() for _ in range(nb_files)], file_size=file_size, epoch_tombstone=True) dataset = did_factory.make_dataset() did_core.attach_dids(dids=dids, account=account, **dataset) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed reaper_cache_region.invalidate() rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None, greedy=True) assert len(list(replica_core.list_replicas(dids=dids, rse_expression=rse_name))) == 0 file_clause = [] for did in dids: file_clause.append(and_(models.DeletedDataIdentifier.scope == did['scope'], models.DeletedDataIdentifier.name == did['name'])) session = get_session() query = session.query(models.DeletedDataIdentifier.scope, models.DeletedDataIdentifier.name, models.DeletedDataIdentifier.did_type).\ filter(or_(*file_clause)) deleted_dids = list() for did in query.all(): print(did) deleted_dids.append(did) assert len(deleted_dids) == len(dids) query = session.query(models.DataIdentifierAssociationHistory.child_scope, models.DataIdentifierAssociationHistory.child_name, models.DataIdentifierAssociationHistory.child_type).\ filter(and_(models.DataIdentifierAssociationHistory.scope == dataset['scope'], models.DataIdentifierAssociationHistory.name == dataset['name'])) deleted_dids = list() for did in query.all(): print(did) deleted_dids.append(did) assert len(deleted_dids) == len(dids)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} else: vo = {} rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name, **vo) mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if vo: reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) else: reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in file_names], rse_expression=rse_name))) == nb_files - 5
def set_rse_limits(rse, name, value, issuer, vo='def', session=None): """ Set RSE limits. :param rse: The RSE name. :param name: The name of the limit. :param value: The feature value. :param issuer: The issuer account. :param vo: The VO to act on. :param session: The database session in use. :returns: True if successful, otherwise false. """ rse_id = rse_module.get_rse_id(rse=rse, vo=vo, session=session) kwargs = {'rse': rse, 'rse_id': rse_id} if not permission.has_permission(issuer=issuer, vo=vo, action='set_rse_limits', kwargs=kwargs, session=session): raise exception.AccessDenied( 'Account %s can not update RSE limits for RSE %s' % (issuer, rse)) return rse_module.set_rse_limits(rse_id=rse_id, name=name, value=value, session=session)
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': 'tst'} else: vo = {} nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK', **vo) for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name='lfn' + generate_uuid(), bytes=file_size, account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) argv = ['--run-once', '--rses', 'MOCK'] main(argv) # Test the rucio-reaper console script cmd = 'rucio-reaper ' + ' '.join(argv) exitcode, out, err = execute(cmd) print(cmd, out, err) nose.tools.assert_equal(exitcode, 0)
def set_rse_limits(rse, name, value, issuer): """ Set RSE limits. :param rse: The RSE name. :param name: The name of the limit. :param value: The feature value. Set to -1 to remove the limit. :param issuer: The issuer account. :returns: True if successful, otherwise false. """ kwargs = {'rse': rse} if not permission.has_permission(issuer=issuer, action='set_rse_limits', kwargs=kwargs): raise exception.AccessDenied('Account %s can not update RSE limits for RSE %s' % (issuer, rse)) return rse_module.set_rse_limits(rse=rse, name=name, value=value)
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock): [cache_region] = caches_mock rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]], account=account, **archive2) dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)] did_core.add_did(did_type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(did_type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by(**archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be removed, and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exists only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3
def import_rses(rses, vo='def', session=None): new_rses = [] for rse_name in rses: rse = rses[rse_name] if isinstance(rse.get('rse_type'), string_types): rse['rse_type'] = RSEType.from_string(str(rse['rse_type'])) try: rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, session=session) except RSENotFound: rse_id = rse_module.add_rse(rse=rse_name, vo=vo, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) else: rse_module.update_rse(rse_id=rse_id, parameters=rse, session=session) new_rses.append(rse_id) # Protocols new_protocols = rse.get('protocols') if new_protocols: # update existing, add missing and remove left over protocols old_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in rse_module.get_rse_protocols( rse_id=rse_id, session=session)['protocols']] missing_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } not in old_protocols ] outdated_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } in old_protocols ] new_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in new_protocols] to_be_removed_protocols = [ old_protocol for old_protocol in old_protocols if old_protocol not in new_protocols ] for protocol in outdated_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse_id=rse_id, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) for protocol in missing_protocols: rse_module.add_protocol(rse_id=rse_id, parameter=protocol, session=session) for protocol in to_be_removed_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] rse_module.del_protocols(rse_id=rse_id, scheme=scheme, port=port, hostname=hostname, session=session) # Limits old_limits = rse_module.get_rse_limits(rse_id=rse_id, session=session) for limit_name in ['MaxBeingDeletedFiles', 'MinFreeSpace']: limit = rse.get(limit_name) if limit: if limit_name in old_limits: rse_module.delete_rse_limit(rse_id=rse_id, name=limit_name, session=session) rse_module.set_rse_limits(rse_id=rse_id, name=limit_name, value=limit, session=session) # Attributes attributes = rse.get('attributes', {}) attributes['lfn2pfn_algorithm'] = rse.get('lfn2pfn_algorithm') attributes['verify_checksum'] = rse.get('verify_checksum') old_attributes = rse_module.list_rse_attributes(rse_id=rse_id, session=session) for attr in attributes: value = attributes[attr] if value is not None: if attr in old_attributes: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) # set deleted flag to RSEs that are missing in the import data old_rses = [ old_rse['id'] for old_rse in rse_module.list_rses(session=session) ] for old_rse in old_rses: if old_rse not in new_rses: try: rse_module.del_rse(rse_id=old_rse, session=session) except RSEOperationNotSupported: pass
def setup(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo_header = {'X-Rucio-VO': 'tst'} self.vo = {'vo': 'tst'} else: self.vo_header = {} self.vo = {} # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() self.old_rse_id_1 = add_rse(self.old_rse_1, availability=1, region_code='DE', country_name='DE', deterministic=True, volatile=True, staging_area=True, time_zone='Europe', latitude='1', longitude='2', **self.vo) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'TODO' }) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme3', 'hostname': 'hostname3', 'port': 1000, 'impl': 'TODO' }) set_rse_limits(rse_id=self.old_rse_id_1, name='MaxBeingDeletedFiles', value='10') set_rse_limits(rse_id=self.old_rse_id_1, name='MinFreeSpace', value='10') add_rse_attribute(rse_id=self.old_rse_id_1, key='attr1', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='lfn2pfn_algorithm', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='verify_checksum', value=True) # RSE 2 that already exists self.old_rse_2 = rse_name_generator() self.old_rse_id_2 = add_rse(self.old_rse_2, **self.vo) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() self.old_rse_id_3 = add_rse(self.old_rse_3, **self.vo) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() self.old_rse_id_4 = add_rse(self.old_rse_4, **self.vo) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) self.data1 = { 'rses': { self.new_rse: { 'rse_type': RSEType.TAPE, 'availability': 3, 'city': 'NewCity', 'region_code': 'CH', 'country_name': 'switzerland', 'staging_area': False, 'time_zone': 'Europe', 'latitude': 1, 'longitude': 2, 'deterministic': True, 'volatile': False, 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }], 'attributes': { 'attr1': 'test' }, 'MinFreeSpace': 20000, 'lfn2pfn_algorithm': 'hash2', 'verify_checksum': False, 'availability_delete': True, 'availability_read': False, 'availability_write': True }, self.old_rse_1: { 'rse_type': RSEType.TAPE, 'deterministic': False, 'volatile': False, 'region_code': 'US', 'country_name': 'US', 'staging_area': False, 'time_zone': 'Asia', 'longitude': 5, 'city': 'City', 'availability': 2, 'latitude': 10, 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }], 'attributes': { 'attr1': 'test1', 'attr2': 'test2' }, 'MinFreeSpace': 10000, 'MaxBeingDeletedFiles': 1000, 'verify_checksum': False, 'lfn2pfn_algorithm': 'hash3', 'availability_delete': False, 'availability_read': False, 'availability_write': True }, self.old_rse_2: {}, self.old_rse_3: {} }, 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_3, 'ranking': 4 } } } } self.data2 = {'rses': {self.new_rse: {'rse': self.new_rse}}} self.data3 = {'distances': {}}
def import_data(data, session=None): """ Import data to add and update records in Rucio. :param data: data to be imported as dictionary. :param session: database session in use. """ # RSEs rses = data.get('rses') if rses: for rse in rses: protocols = rse.get('protocols') if protocols: protocols = protocols.get('protocols') del rse['protocols'] rse_name = rse['rse'] del rse['rse'] if not rse_module.rse_exists(rse_name, session=session): rse_module.add_rse(rse_name, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) else: rse_module.update_rse(rse_name, rse, session=session) # Protocols if protocols: old_protocols = rse_module.get_rse_protocols(rse=rse_name, session=session) for protocol in protocols: scheme = protocol.get('scheme') hostname = protocol.get('hostname') port = protocol.get('port') intersection = [ old_protocol for old_protocol in old_protocols['protocols'] if old_protocol['scheme'] == scheme and old_protocol['hostname'] == hostname and old_protocol['port'] == port ] if intersection: del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse=rse_name, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) else: rse_module.add_protocol(rse=rse_name, parameter=protocol, session=session) # Limits limits = rse.get('limits') if limits: old_limits = rse_module.get_rse_limits(rse=rse_name, session=session) for limit in limits: if limit in old_limits: rse_module.delete_rse_limit(rse=rse_name, name=limit, session=session) rse_module.set_rse_limits(rse=rse_name, name=limit, value=limits[limit], session=session) # Transfer limits transfer_limits = rse.get('transfer_limits') if transfer_limits: for limit in transfer_limits: old_transfer_limits = rse_module.get_rse_transfer_limits( rse=rse_name, activity=limit, session=session) if limit in old_transfer_limits: rse_module.delete_rse_transfer_limits(rse=rse_name, activity=limit, session=session) max_transfers = transfer_limits[limit].items( )[0][1]['max_transfers'] rse_module.set_rse_transfer_limits( rse=rse_name, activity=limit, max_transfers=max_transfers, session=session) # Attributes attributes = rse.get('attributes') if attributes: old_attributes = rse_module.list_rse_attributes( rse=rse_name, session=session) for attr in attributes: if attr in old_attributes: rse_module.del_rse_attribute(rse=rse_name, key=attr, session=session) rse_module.add_rse_attribute(rse=rse_name, key=attr, value=attributes[attr], session=session) # Distances distances = data.get('distances') if distances: for src_rse_name in distances: src = rse_module.get_rse_id(src_rse_name, session=session) for dest_rse_name in distances[src_rse_name]: dest = rse_module.get_rse_id(dest_rse_name, session=session) distance = distances[src_rse_name][dest_rse_name] del distance['src_rse_id'] del distance['dest_rse_id'] old_distance = distance_module.get_distances(src_rse_id=src, dest_rse_id=dest, session=session) if old_distance: distance_module.update_distances(src_rse_id=src, dest_rse_id=dest, parameters=distance, session=session) else: distance_module.add_distance( src_rse_id=src, dest_rse_id=dest, ranking=distance.get('ranking'), agis_distance=distance.get('agis_distance'), geoip_distance=distance.get('geoip_distance'), active=distance.get('active'), submitted=distance.get('submitted'), transfer_speed=distance.get('transfer_speed'), finished=distance.get('finished'), failed=distance.get('failed'), session=session)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } new_vo = {'vo': 'new'} if not vo_core.vo_exists(**new_vo): vo_core.add_vo(description='Test', email='*****@*****.**', **new_vo) if not scope_core.check_scope(InternalScope('data13_hip', **new_vo)): scope_core.add_scope(InternalScope('data13_hip', **new_vo), InternalAccount('root', **new_vo)) nb_rses = 2 else: vo = {} new_vo = {} nb_rses = 1 mock_protocol = { 'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } } nb_files = 30 file_size = 2147483648 # 2G rse_names = [] all_file_names = [] for j in range(nb_rses): rse_name = rse_name_generator() rse_names.append(rse_name) rse_id = rse_core.add_rse(rse_name, **vo) rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) if new_vo: rse_id_new = rse_core.add_rse(rse_name, **new_vo) rse_core.add_protocol(rse_id=rse_id_new, parameter=mock_protocol) file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) if new_vo: replica_core.add_replica( rse_id=rse_id_new, scope=InternalScope('data13_hip', **new_vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **new_vo), adler32=None, md5=None) all_file_names.append(file_names) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if new_vo: rse_core.set_rse_usage(rse_id=rse_id_new, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id_new, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id_new, name='MaxBeingDeletedFiles', value=10) if not vo: reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_name))) == nb_files - 5 else: # Check we reap all VOs by default reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_names[0]))) == nb_files - 5 assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **new_vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_names[0]))) == nb_files - 5 # Check we don't affect a second VO that isn't specified reaper(once=True, rses=[], include_rses=rse_names[1], exclude_rses=[], vos=['new']) reaper(once=True, rses=[], include_rses=rse_names[1], exclude_rses=[], vos=['new']) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[1]], rse_expression=rse_names[1]))), nb_files assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **new_vo), 'name': n } for n in all_file_names[1]], rse_expression=rse_names[1]))), nb_files - 5
def setup(self): # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() self.old_rse_id_1 = add_rse(self.old_rse_1, availability=1, region_code='DE', country_name='DE', deterministic=True, volatile=True, staging_area=True, time_zone='Europe', latitude='1', longitude='2') add_protocol( self.old_rse_id_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'TODO' }) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme3', 'hostname': 'hostname3', 'port': 1000, 'impl': 'TODO' }) set_rse_limits(rse_id=self.old_rse_id_1, name='MaxBeingDeletedFiles', value='10') set_rse_limits(rse_id=self.old_rse_id_1, name='MinFreeSpace', value='10') add_rse_attribute(rse_id=self.old_rse_id_1, key='attr1', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='lfn2pfn_algorithm', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='verify_checksum', value=True) # RSE 2 that already exists self.old_rse_2 = rse_name_generator() self.old_rse_id_2 = add_rse(self.old_rse_2) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() self.old_rse_id_3 = add_rse(self.old_rse_3) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() self.old_rse_id_4 = add_rse(self.old_rse_4) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() add_rse(self.old_rse_4) self.old_rse_id_4 = get_rse_id(self.old_rse_4) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) # Account 1 that already exists self.old_account_1 = InternalAccount(rse_name_generator()) add_account(self.old_account_1, AccountType.USER, email='test') # Account 2 that already exists self.old_account_2 = InternalAccount(rse_name_generator()) add_account(self.old_account_2, AccountType.USER, email='test') # Identity that should be removed self.identity_to_be_removed = rse_name_generator() add_identity(self.identity_to_be_removed, IdentityType.X509, email='email') add_account_identity(self.identity_to_be_removed, IdentityType.X509, self.old_account_2, 'email') # Identity that already exsits but should be added to the account self.identity_to_be_added_to_account = rse_name_generator() add_identity(self.identity_to_be_added_to_account, IdentityType.X509, email='email') self.data1 = { 'rses': { self.new_rse: { 'rse_type': RSEType.TAPE, 'availability': 3, 'city': 'NewCity', 'region_code': 'CH', 'country_name': 'switzerland', 'staging_area': False, 'time_zone': 'Europe', 'latitude': 1, 'longitude': 2, 'deterministic': True, 'volatile': False, 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }], 'attributes': { 'attr1': 'test' }, 'MinFreeSpace': 20000, 'lfn2pfn_algorithm': 'hash2', 'verify_checksum': False, 'availability_delete': True, 'availability_read': False, 'availability_write': True }, self.old_rse_1: { 'rse_type': RSEType.TAPE, 'deterministic': False, 'volatile': False, 'region_code': 'US', 'country_name': 'US', 'staging_area': False, 'time_zone': 'Asia', 'longitude': 5, 'city': 'City', 'availability': 2, 'latitude': 10, 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }], 'attributes': { 'attr1': 'test1', 'attr2': 'test2' }, 'MinFreeSpace': 10000, 'MaxBeingDeletedFiles': 1000, 'verify_checksum': False, 'lfn2pfn_algorithm': 'hash3', 'availability_delete': False, 'availability_read': False, 'availability_write': True }, self.old_rse_2: {}, self.old_rse_3: {} }, 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_3, 'ranking': 4 } } }, 'accounts': [{ 'account': InternalAccount('new_account'), 'email': 'email', 'identities': [{ 'type': 'userpass', 'identity': 'username', 'password': '******' }] }, { 'account': InternalAccount('new_account2'), 'email': 'email' }, { 'account': self.old_account_2, 'email': 'new_email', 'identities': [{ 'identity': self.identity_to_be_added_to_account, 'type': 'x509' }, { 'type': 'userpass', 'identity': 'username2', 'password': '******' }] }, { 'account': InternalAccount('jdoe'), 'email': 'email' }] } self.data2 = {'rses': {self.new_rse: {'rse': self.new_rse}}} self.data3 = {'distances': {}}
def setup(self): # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() add_rse(self.old_rse_1, availability=1) add_protocol( self.old_rse_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'impl' }) self.old_rse_id_1 = get_rse_id(self.old_rse_1) set_rse_limits(rse=self.old_rse_1, name='limit1', value='10') set_rse_transfer_limits(rse=self.old_rse_1, activity='activity1', max_transfers=10) add_rse_attribute(rse=self.old_rse_1, key='attr1', value='test10') # RSE 2 that already exists self.old_rse_2 = rse_name_generator() add_rse(self.old_rse_2) self.old_rse_id_2 = get_rse_id(self.old_rse_2) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() add_rse(self.old_rse_3) self.old_rse_id_3 = get_rse_id(self.old_rse_3) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) self.data1 = { 'rses': [{ 'rse': self.new_rse, 'rse_type': 'TAPE', 'availability': 5, 'city': 'NewCity', 'protocols': { 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }] }, 'limits': { 'limit1': 0 }, 'transfer_limits': { 'activity1': { 'unknown_rse_id': { 'max_transfers': 1 } } }, 'attributes': { 'attr1': 'test' } }, { 'rse': self.old_rse_1, 'protocols': { 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }] }, 'limits': { 'limit1': 0, 'limit2': 2 }, 'transfer_limits': { 'activity1': { self.old_rse_id_1: { 'max_transfers': 1 } }, 'activity2': { self.old_rse_id_1: { 'max_transfers': 2 } } }, 'attributes': { 'attr1': 'test1', 'attr2': 'test2' } }], 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse_id': self.old_rse_id_1, 'dest_rse_id': self.old_rse_id_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse_id': self.old_rse_id_1, 'dest_rse_id': self.old_rse_id_3, 'ranking': 4 } } } } self.data2 = {'rses': [{'rse': self.new_rse}]} self.data3 = {'distances': {}}
def import_rses(rses, rse_sync_method='edit', attr_sync_method='edit', protocol_sync_method='edit', vo='def', session=None): new_rses = [] for rse_name in rses: rse = rses[rse_name] if isinstance(rse.get('rse_type'), string_types): rse['rse_type'] = RSEType(rse['rse_type']) if rse_module.rse_exists(rse_name, vo=vo, include_deleted=False, session=session): # RSE exists and is active rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, session=session) selected_rse_properties = { key: rse[key] for key in rse if key in rse_module.MUTABLE_RSE_PROPERTIES } rse_module.update_rse(rse_id=rse_id, parameters=selected_rse_properties, session=session) elif rse_module.rse_exists(rse_name, vo=vo, include_deleted=True, session=session): # RSE exists but in deleted state # Should only modify the RSE if importer is configured for edit or hard sync if rse_sync_method in ['edit', 'hard']: rse_id = rse_module.get_rse_id(rse=rse_name, vo=vo, include_deleted=True, session=session) rse_module.restore_rse(rse_id, session=session) selected_rse_properties = { key: rse[key] for key in rse if key in rse_module.MUTABLE_RSE_PROPERTIES } rse_module.update_rse(rse_id=rse_id, parameters=selected_rse_properties, session=session) else: # Config is in RSE append only mode, should not modify the disabled RSE continue else: rse_id = rse_module.add_rse(rse=rse_name, vo=vo, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) new_rses.append(rse_id) # Protocols new_protocols = rse.get('protocols') if new_protocols: # update existing, add missing and remove left over protocols old_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in rse_module.get_rse_protocols( rse_id=rse_id, session=session)['protocols']] missing_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } not in old_protocols ] outdated_protocols = [ new_protocol for new_protocol in new_protocols if { 'scheme': new_protocol['scheme'], 'hostname': new_protocol['hostname'], 'port': new_protocol['port'] } in old_protocols ] new_protocols = [{ 'scheme': protocol['scheme'], 'hostname': protocol['hostname'], 'port': protocol['port'] } for protocol in new_protocols] to_be_removed_protocols = [ old_protocol for old_protocol in old_protocols if old_protocol not in new_protocols ] if protocol_sync_method == 'append': outdated_protocols = [] for protocol in outdated_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse_id=rse_id, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) for protocol in missing_protocols: rse_module.add_protocol(rse_id=rse_id, parameter=protocol, session=session) if protocol_sync_method == 'hard': for protocol in to_be_removed_protocols: scheme = protocol['scheme'] port = protocol['port'] hostname = protocol['hostname'] rse_module.del_protocols(rse_id=rse_id, scheme=scheme, port=port, hostname=hostname, session=session) # Limits old_limits = rse_module.get_rse_limits(rse_id=rse_id, session=session) for limit_name in ['MaxBeingDeletedFiles', 'MinFreeSpace']: limit = rse.get(limit_name) if limit: if limit_name in old_limits: rse_module.delete_rse_limits(rse_id=rse_id, name=limit_name, session=session) rse_module.set_rse_limits(rse_id=rse_id, name=limit_name, value=limit, session=session) # Attributes attributes = rse.get('attributes', {}) attributes['lfn2pfn_algorithm'] = rse.get('lfn2pfn_algorithm') attributes['verify_checksum'] = rse.get('verify_checksum') old_attributes = rse_module.list_rse_attributes(rse_id=rse_id, session=session) missing_attributes = [ attribute for attribute in old_attributes if attribute not in attributes ] for attr in attributes: value = attributes[attr] if value is not None: if attr in old_attributes: if attr_sync_method not in ['append']: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) else: rse_module.add_rse_attribute(rse_id=rse_id, key=attr, value=value, session=session) if attr_sync_method == 'hard': for attr in missing_attributes: if attr != rse_name: rse_module.del_rse_attribute(rse_id=rse_id, key=attr, session=session) # set deleted flag to RSEs that are missing in the import data old_rses = [ old_rse['id'] for old_rse in rse_module.list_rses(session=session) ] if rse_sync_method == 'hard': for old_rse in old_rses: if old_rse not in new_rses: try: rse_module.del_rse(rse_id=old_rse, session=session) except RSEOperationNotSupported: pass
def test_multihop_intermediate_replica_lifecycle(vo, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that intermediate replicas created by the submitter are protected from deletion even if their tombstone is set to epoch. After successful transfers, intermediate replicas with default (epoch) tombstone must be removed. The others must be left intact. """ src_rse1_name = 'XRD1' src_rse1_id = rse_core.get_rse_id(rse=src_rse1_name, vo=vo) src_rse2_name = 'XRD2' src_rse2_id = rse_core.get_rse_id(rse=src_rse2_name, vo=vo) jump_rse_name = 'XRD3' jump_rse_id = rse_core.get_rse_id(rse=jump_rse_name, vo=vo) dst_rse_name = 'XRD4' dst_rse_id = rse_core.get_rse_id(rse=dst_rse_name, vo=vo) all_rses = [src_rse1_id, src_rse2_id, jump_rse_id, dst_rse_id] did = did_factory.upload_test_file(src_rse1_name) # Copy replica to a second source. To avoid the special case of having a unique last replica, which could be handled in a special (more careful) way rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=src_rse2_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) replica = __wait_for_replica_transfer(dst_rse_id=src_rse2_id, **did) assert replica['state'] == ReplicaState.AVAILABLE rse_core.set_rse_limits(rse_id=jump_rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=jump_rse_id, source='storage', used=1, free=0) try: rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Submit transfers to FTS # Ensure a replica was created on the intermediary host with epoch tombstone submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) assert request['state'] == RequestState.SUBMITTED replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica['tombstone'] == datetime(year=1970, month=1, day=1) assert replica['state'] == ReplicaState.COPYING # The intermediate replica is protected by its state (Copying) rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses=jump_rse_name, exclude_rses=None) replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica['state'] == ReplicaState.COPYING # Wait for the intermediate replica to become ready replica = __wait_for_replica_transfer(dst_rse_id=jump_rse_id, **did) assert replica['state'] == ReplicaState.AVAILABLE # The intermediate replica is protected by an entry in the sources table # Reaper must not remove this replica, even if it has an obsolete tombstone rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses=jump_rse_name, exclude_rses=None) replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica # FTS fails the second transfer, so run submitter again to copy from jump rse to destination rse submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) # Wait for the destination replica to become ready replica = __wait_for_replica_transfer(dst_rse_id=dst_rse_id, **did) assert replica['state'] == ReplicaState.AVAILABLE rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses='test_container_xrd=True', exclude_rses=None) with pytest.raises(ReplicaNotFound): replica_core.get_replica(rse_id=jump_rse_id, **did) finally: @transactional_session def _cleanup_all_usage_and_limits(rse_id, session=None): session.query(models.RSELimit).filter_by(rse_id=rse_id).delete() session.query(models.RSEUsage).filter_by( rse_id=rse_id, source='storage').delete() _cleanup_all_usage_and_limits(rse_id=jump_rse_id)
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name) mock_protocol = { 'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } } rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip'), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root'), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) rses = [ rse_core.get_rse(rse_id), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses) assert_equal( len( list( replica_core.list_replicas(dids=[{ 'scope': InternalScope('data13_hip'), 'name': n } for n in file_names], rse_expression=rse_name))), nb_files - 10)
def test_source_avoid_deletion(vo, caches_mock, core_config_mock, rse_factory, did_factory, root_account, file_factory): """ Test that sources on a file block it from deletion """ _, reaper_region = caches_mock src_rse1, src_rse1_id = rse_factory.make_mock_rse() src_rse2, src_rse2_id = rse_factory.make_mock_rse() dst_rse, dst_rse_id = rse_factory.make_mock_rse() all_rses = [src_rse1_id, src_rse2_id, dst_rse_id] any_source = f'{src_rse1}|{src_rse2}' for rse_id in [src_rse1_id, src_rse2_id]: rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=1, free=0) distance_core.add_distance(src_rse1_id, dst_rse_id, ranking=20) distance_core.add_distance(src_rse2_id, dst_rse_id, ranking=10) # Upload a test file to both rses without registering did = did_factory.random_did() # Register replica on one source RSE replica_core.add_replica(rse_id=src_rse1_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Reaper will not delete a file which only has one replica if there is any pending transfer for it reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1 # Register replica on second source rse replica_core.add_replica(rse_id=src_rse2_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 # Submit the transfer. This will create the sources. submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # None of the replicas will be removed. They are protected by an entry in the sources table reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 @transactional_session def __delete_sources(rse_id, scope, name, session=None): session.execute( delete(Source).where(Source.rse_id == rse_id, Source.scope == scope, Source.name == name)) # Deletion succeeds for one replica (second still protected by existing request) __delete_sources(src_rse1_id, **did) __delete_sources(src_rse2_id, **did) reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} new_vo = {'vo': 'new'} if not vo_core.vo_exists(**new_vo): vo_core.add_vo(description='Test', email='*****@*****.**', **new_vo) if not scope_core.check_scope(InternalScope('data13_hip', **new_vo)): scope_core.add_scope(InternalScope('data13_hip', **new_vo), InternalAccount('root', **new_vo)) nb_rses = 2 else: vo = {} new_vo = {} nb_rses = 1 mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} nb_files = 250 file_size = 200 # 2G rse_names = [] all_file_names = [] for j in range(nb_rses): rse_name = rse_name_generator() rse_names.append(rse_name) rse_id = rse_core.add_rse(rse_name, **vo) rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) if new_vo: rse_id_new = rse_core.add_rse(rse_name, **new_vo) rse_core.add_protocol(rse_id=rse_id_new, parameter=mock_protocol) file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) if new_vo: replica_core.add_replica(rse_id=rse_id_new, scope=InternalScope('data13_hip', **new_vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **new_vo), adler32=None, md5=None) all_file_names.append(file_names) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if new_vo: rse_core.set_rse_usage(rse_id=rse_id_new, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id_new, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id_new, name='MaxBeingDeletedFiles', value=10) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() if not vo: assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Now put it over threshold and delete rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == 200 else: # Check we reap all VOs by default reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200 assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **new_vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200