def __add_test_rse_and_replicas(vo, scope, rse_name, names, file_size, epoch_tombstone=False): rse_id = rse_core.add_rse(rse_name, vo=vo) rse_core.add_protocol(rse_id=rse_id, parameter=__mock_protocol) tombstone = datetime.utcnow() - timedelta(days=1) if epoch_tombstone: tombstone = datetime(year=1970, month=1, day=1) dids = [] for file_name in names: dids.append({'scope': scope, 'name': file_name}) replica_core.add_replica(rse_id=rse_id, scope=scope, name=file_name, bytes_=file_size, tombstone=tombstone, account=InternalAccount('root', vo=vo), adler32=None, md5=None) return rse_name, rse_id, dids
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK') for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip'), name='lfn' + generate_uuid(), bytes=file_size, account=InternalAccount('root'), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) reaper(once=True, rses=[], include_rses='MOCK', exclude_rses=[]) reaper(once=True, rses=[], include_rses='MOCK', exclude_rses=[])
def test_update_lock_counter(self): """ RSE (CORE): Test the update of a replica lock counter """ rse = 'MOCK' tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() add_replica(rse=rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb', account='jdoe') values = (1, 1, 1, -1, -1, -1, 1, 1, -1) tombstones = (True, True, True, True, True, False, True, True, True) lock_counters = (1, 2, 3, 2, 1, 0, 1, 2, 1) for value, tombstone, lock_counter in zip(values, tombstones, lock_counters): status = update_replica_lock_counter(rse=rse, scope=tmp_scope, name=tmp_file, value=value) assert_equal(status, True) replica = get_replica(rse=rse, scope=tmp_scope, name=tmp_file) assert_equal(replica['tombstone'] is None, tombstone) assert_equal(lock_counter, replica['lock_cnt'])
def __update_replica(replica, session=None): """ Used by finisher to update a replica to a finished state. :param replica: Replica as a dictionary. :param rule_id: RULE id. :param session: The database session to use. :returns commit_or_rollback: Boolean. """ try: replica_core.update_replicas_states([replica], nowait=True, add_tombstone=True, session=session) if not replica['archived']: request_core.archive_request(replica['request_id'], session=session) logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s", replica['request_id'], replica['scope'], replica['name'], replica['rse_id'], str(replica['state'])) except (UnsupportedOperation, ReplicaNotFound) as error: logging.warn( "ERROR WHEN HANDLING REQUEST %s DID %s:%s AT RSE %s STATE %s: %s", replica['request_id'], replica['scope'], replica['name'], replica['rse_id'], str(replica['state']), str(error)) # replica cannot be found. register it and schedule it for deletion try: if replica['state'] == ReplicaState.AVAILABLE and replica[ 'request_type'] != RequestType.STAGEIN: logging.info( "Replica cannot be found. Adding a replica %s:%s AT RSE %s with tombstone=utcnow", replica['scope'], replica['name'], replica['rse_id']) replica_core.add_replica( replica['rse_id'], replica['scope'], replica['name'], replica['bytes'], pfn=replica['pfn'] if 'pfn' in replica else None, account=InternalAccount( 'root', vo=replica['scope'].vo ), # it will deleted immediately, do we need to get the accurate account from rule? adler32=replica['adler32'], tombstone=datetime.datetime.utcnow(), session=session) if not replica['archived']: request_core.archive_request(replica['request_id'], session=session) logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s", replica['request_id'], replica['scope'], replica['name'], replica['rse_id'], str(replica['state'])) except Exception as error: logging.error( 'Cannot register replica for DID %s:%s at RSE %s - potential dark data - %s', replica['scope'], replica['name'], replica['rse_id'], str(error)) raise return True
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK') for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) argv = ['--run-once', '--rses', 'MOCK'] main(argv) # Test the rucio-reaper console script cmd = 'rucio-reaper ' + ' '.join(argv) exitcode, out, err = execute(cmd) print(cmd, out, err) nose.tools.assert_equal(exitcode, 0)
def test_set_tombstone(self): """ REPLICA (CORE): set tombstone on replica """ # Set tombstone on one replica rse = 'MOCK4' scope = 'mock' user = '******' name = generate_uuid() add_replica(rse, scope, name, 4, user) assert_equal(get_replica(rse, scope, name)['tombstone'], None) set_tombstone(rse, scope, name) assert_equal(get_replica(rse, scope, name)['tombstone'], OBSOLETE) # Set tombstone on locked replica name = generate_uuid() add_replica(rse, scope, name, 4, user) RuleClient().add_replication_rule([{ 'name': name, 'scope': scope }], 1, rse, locked=True) with assert_raises(ReplicaIsLocked): set_tombstone(rse, scope, name) # Set tombstone on not found replica name = generate_uuid() with assert_raises(ReplicaNotFound): set_tombstone(rse, scope, name)
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" nb_files = 30 file_size = 2147483648L # 2G for i in xrange(nb_files): replica_core.add_replica(rse='MOCK', scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse='MOCK', source='srm', used=nb_files * file_size, free=800L) rse_core.set_rse_limits(rse='MOCK', name='MinFreeSpace', value=10737418240L) rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10) rses = [ rse_core.get_rse('MOCK'), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses)
def test_throttler_fifo_release_nothing(self): """ THROTTLER (CLIENTS): throttler release nothing (fifo). """ # two waiting requests and one active requests but threshold is 1 # more than 80% of the transfer limit are already used -> release nothing set('throttler', '%s,%s' % (self.user_activity, self.dest_rse), 1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request['state'], constants.RequestState.WAITING) request2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request2['state'], constants.RequestState.WAITING)
def test_throttler_fifo_release_subset(self): """ THROTTLER (CLIENTS): throttler release subset of waiting requests (fifo). """ # two waiting requests and no active requests but threshold is 1 -> release only 1 request set('throttler', '%s,%s' % (self.user_activity, self.dest_rse), 1, session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request['state'], constants.RequestState.QUEUED) request2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request2['state'], constants.RequestState.WAITING)
def test_get_did_with_dynamic(self): """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size""" tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() tmp_dsn3 = 'dsn_%s' % generate_uuid() tmp_dsn4 = 'dsn_%s' % generate_uuid() add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_replica(rse='MOCK', scope=tmp_scope, name=tmp_dsn2, bytes=10, account='root') add_replica(rse='MOCK', scope=tmp_scope, name=tmp_dsn3, bytes=10, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, dids=[{ 'scope': tmp_scope, 'name': tmp_dsn2 }, { 'scope': tmp_scope, 'name': tmp_dsn3 }], account='root') add_did(scope=tmp_scope, name=tmp_dsn4, type=DIDType.CONTAINER, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn4, dids=[{ 'scope': tmp_scope, 'name': tmp_dsn1 }], account='root') assert_equal( get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'], 20) assert_equal( get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'], 20)
def test_set_rse_limits(self): """ RSE (CLIENTS): Test the update of RSE limits.""" nb_files = 30 file_size = 2147483648L # 2G for file in xrange(nb_files): replica_core.add_replica(rse='MOCK', scope='data13_hip', name='lfn' + generate_uuid(), bytes=file_size, account='root', adler32=None, md5=None) rse_core.set_rse_usage(rse='MOCK', source='srm', used=nb_files*file_size, free=800L) rse_core.set_rse_limits(rse='MOCK', name='MinFreeSpace', value=10737418240L) rse_core.set_rse_limits(rse='MOCK', name='MaxBeingDeletedFiles', value=10) rses = [rse_core.get_rse('MOCK'), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} else: vo = {} rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name, **vo) mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if vo: reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) else: reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in file_names], rse_expression=rse_name))) == nb_files - 5
def test_update_lock_counter(self): """ RSE (CORE): Test the update of a replica lock counter """ rse = 'MOCK' tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() add_replica(rse=rse, scope=tmp_scope, name=tmp_file, bytes=1L, adler32='0cc737eb', account='jdoe') values = (1, 1, 1, -1, -1, -1, 1, 1, -1) tombstones = (True, True, True, True, True, False, True, True, True) lock_counters = (1, 2, 3, 2, 1, 0, 1, 2, 1) for value, tombstone, lock_counter in zip(values, tombstones, lock_counters): status = update_replica_lock_counter(rse=rse, scope=tmp_scope, name=tmp_file, value=value) assert_equal(status, True) replica = get_replica(rse=rse, scope=tmp_scope, name=tmp_file) assert_equal(replica['tombstone'] is None, tombstone) assert_equal(lock_counter, replica['lock_cnt'])
def create_files(nrfiles, scope, rse, bytes=1): """ Creates a number of test files and add replicas to rse :param nrfiles: Number of files to create :param scope: Scope to create the files in :param rse: RSE to add the replica to :param bytes: Bytes of each file :returns: List of dict """ files = [] for i in xrange(nrfiles): file = 'file_%s' % uuid() add_replica(rse=rse, scope=scope, name=file, bytes=bytes, account='jdoe') files.append({'scope': scope, 'name': file, 'bytes': bytes}) return files
def test_release_waiting_requests_all(self): """ REQUEST (CORE): release all waiting requests. """ name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': 'User Subscription', 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_all_waiting_requests(self.dest_rse_id, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED)
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': 'tst'} else: vo = {} nb_files = 30 file_size = 2147483648 # 2G rse_id = rse_core.get_rse_id(rse='MOCK', **vo) for i in range(nb_files): replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name='lfn' + generate_uuid(), bytes=file_size, account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='srm', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) argv = ['--run-once', '--rses', 'MOCK'] main(argv) # Test the rucio-reaper console script cmd = 'rucio-reaper ' + ' '.join(argv) exitcode, out, err = execute(cmd) print(cmd, out, err) nose.tools.assert_equal(exitcode, 0)
def test_get_did_with_dynamic(self): """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size""" tmp_scope = InternalScope('mock', **self.vo) root = InternalAccount('root', **self.vo) tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() tmp_dsn3 = 'dsn_%s' % generate_uuid() tmp_dsn4 = 'dsn_%s' % generate_uuid() rse_id = get_rse_id(rse='MOCK', **self.vo) add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account=root) add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn2, bytes=10, account=root) add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn3, bytes=10, account=root) attach_dids(scope=tmp_scope, name=tmp_dsn1, dids=[{'scope': tmp_scope, 'name': tmp_dsn2}, {'scope': tmp_scope, 'name': tmp_dsn3}], account=root) add_did(scope=tmp_scope, name=tmp_dsn4, type=DIDType.CONTAINER, account=root) attach_dids(scope=tmp_scope, name=tmp_dsn4, dids=[{'scope': tmp_scope, 'name': tmp_dsn1}], account=root) assert get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'] == 20 assert get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'] == 20
def compose(scope, name, rse, bytes, sources, account, md5=None, adler32=None, pfn=None, meta={}, rules=[], parent_scope=None, parent_name=None, session=None): """ Concatenates a list of existing dids into a new file replica :param scope: the scope name. :param name: The data identifier name. :param rse: the rse name. :param bytes: the size of the file. :sources sources: The list of temporary DIDs. :param account: The account owner. :param md5: The md5 checksum. :param adler32: The adler32 checksum. :param pfn: Physical file name (for nondeterministic rse). :param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary. :param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ]. :param parent_scope: Possible dataset scope. :param parent_name: Possibe dataset name. :param session: The database session in use. """ # Create the new file did and replica add_replica(rse=rse, scope=scope, name=name, bytes=bytes, account=account, adler32=adler32, md5=md5, pfn=pfn, meta=meta, rules=rules, session=session) # Attach the file to a dataset if parent_scope and parent_name: attach_dids(scope=parent_scope, name=parent_name, dids=[{'scope': scope, 'name': name}], account=account, rse=None, session=session) # Mark the merged dids as obsolete now, expired_dids = datetime.utcnow(), [] for source in sources: expired_dids.append({'scope': source['scope'], 'name': source['name'], 'expired_at': now}) session.bulk_update_mappings(models.TemporaryDataIdentifier, expired_dids)
def test_access_rule_vo(self): """ MULTI VO (CORE): Test accessing rules from a different VO """ scope = InternalScope('mock', **self.vo) dataset = 'dataset_' + str(generate_uuid()) account = InternalAccount('root', **self.vo) rse_str = ''.join(choice(ascii_uppercase) for x in range(10)) rse_name = 'MOCK_%s' % rse_str rse_id = add_rse(rse_name, 'root', **self.vo) add_replica(rse_id=rse_id, scope=scope, name=dataset, bytes=10, account=account) rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account=account, copies=1, rse_expression='MOCK', grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] with assert_raises(AccessDenied): delete_replication_rule(rule_id=rule_id, purge_replicas=False, issuer='root', **self.new_vo) # check locks are not accessible from other VO locks = list(get_replica_locks_for_rule_id(rule_id, **self.vo)) assert_equal(len(locks), 1) locks = list(get_replica_locks_for_rule_id(rule_id, **self.new_vo)) assert_equal(len(locks), 0) delete_replication_rule(rule_id=rule_id, purge_replicas=False, issuer='root', **self.vo) rule_dict = get_replication_rule(rule_id=rule_id, issuer='root', **self.vo) assert_is_not_none(rule_dict['expires_at'])
def test_queue_requests_state(vo, use_preparer): """ REQUEST (CORE): test queuing requests """ if use_preparer == 'preparer enabled': use_preparer = True elif use_preparer == 'preparer disabled': use_preparer = False else: return pytest.xfail(reason=f'unknown test parameter use_preparer={use_preparer}') db_session = session.get_session() dest_rse = 'MOCK' dest_rse2 = 'MOCK2' source_rse = 'MOCK4' source_rse2 = 'MOCK5' dest_rse_id = get_rse_id(dest_rse, vo=vo) dest_rse_id2 = get_rse_id(dest_rse2, vo=vo) source_rse_id = get_rse_id(source_rse, vo=vo) source_rse_id2 = get_rse_id(source_rse2, vo=vo) scope = InternalScope('mock', vo=vo) account = InternalAccount('root', vo=vo) user_activity = 'User Subscription' config_set('conveyor', 'use_preparer', str(use_preparer)) target_state = RequestState.PREPARING if use_preparer else RequestState.QUEUED name = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() add_replica(source_rse_id, scope, name, 1, account, session=db_session) add_replica(source_rse_id2, scope, name2, 1, account, session=db_session) add_replica(source_rse_id, scope, name3, 1, account, session=db_session) set_rse_transfer_limits(dest_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(dest_rse_id2, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id2, user_activity, max_transfers=1, session=db_session) requests = [{ 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id2, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'unknown', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id2, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }] try: queue_requests(requests, session=db_session) request = get_request_by_did(scope, name, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name2, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name3, dest_rse_id2, session=db_session) assert request['state'] == target_state finally: config_remove_option('conveyor', 'use_preparer') db_session.query(models.Source).delete() db_session.query(models.Request).delete() db_session.query(models.RSETransferLimit).delete() db_session.query(models.Distance).delete() db_session.commit() reset_config_table()
def request_transfer(loop=1, src=None, dst=None, upload=False, same_src=False, same_dst=False): """ Main loop to request a new transfer. """ logging.info('request: starting') session = get_session() src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) logging.info('request: started') i = 0 while not graceful_stop.is_set(): if i >= loop: return try: if not same_src: src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) if not same_dst: dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) tmp_name = generate_uuid() # add a new dataset scope = InternalScope('mock') account = InternalAccount('root') did.add_did(scope=scope, name='dataset-%s' % tmp_name, type=DIDType.DATASET, account=account, session=session) # construct PFN pfn = rsemanager.lfns2pfns(src_rse, lfns=[{ 'scope': scope.external, 'name': 'file-%s' % tmp_name }])['%s:file-%s' % (scope.external, tmp_name)] if upload: # create the directories if needed p = rsemanager.create_protocol(src_rse, operation='write', scheme='srm') p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical( 'Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, session=session) break # add the replica replica.add_replica(rse_id=src_rse['id'], scope=scope, name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account=account, session=session) logging.info('added replica on %s for DID mock:%s' % (src_rse['rse'], tmp_name)) # to the dataset did.attach_dids(scope=scope, name='dataset-%s' % tmp_name, dids=[{ 'scope': scope, 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes') }], account=account, session=session) # add rule for the dataset rule.add_rule(dids=[{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, copies=1, rse_expression=dst_rse['rse'], grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID %s:%s' % (dst_rse['rse'], scope, tmp_name)) session.commit() except: session.rollback() logging.critical(traceback.format_exc()) i += 1 logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def test_throttler_grouped_fifo_all(self): """ THROTTLER (CLIENTS): throttler release all waiting requests (grouped fifo). """ # no threshold -> release all waiting requests name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.QUEUED)
def test_release_waiting_requests_fifo(self): """ REQUEST (CORE): release waiting requests based on FIFO. """ # without account and activity check # two requests -> release one request -> request with oldest requested_at date should be released name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request2['state'], constants.RequestState.WAITING) # with activity and account check # two requests -> release two request -> requests with correct account and activity should be released self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) requests = [ { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'attributes': { 'activity': 'ignore', 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': InternalAccount('jdoe'), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace( year=2020 ), # requested latest but account and activity are correct 'name': name4, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } } ] queue_requests(requests, session=self.db_session) release_waiting_requests_fifo(self.dest_rse_id, count=2, account=self.account, activity=self.user_activity, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED)
def test_release_waiting_requests_per_free_volume(self): """ REQUEST (CORE): release waiting requests that fit grouped in available volume.""" if self.dialect == 'mysql': return True # release unattached requests that fit in available volume with respect to already submitted transfers name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because small enough request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # still waiting because requested later and to big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # still waiting because too big request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release attached requests that fit together with the dataset in available volume with respect to already submitted transfers self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) name4 = generate_uuid() add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) dataset1_name = generate_uuid() add_did(self.scope, dataset1_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset1_name, [{ 'name': name1, 'scope': self.scope }, { 'name': name4, 'scope': self.scope }], self.account, session=self.db_session) dataset2_name = generate_uuid() add_did(self.scope, dataset2_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset2_name, [{ 'name': name2, 'scope': self.scope }, { 'name': name3, 'scope': self.scope }], self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 6, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2030), 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because dataset fits in volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # waiting because dataset is too big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release requests with no available volume -> release nothing self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.dest_rse_id, self.scope, name1, 1, self.account, session=self.db_session) volume = 0 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # waiting because no available volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING)
def test_throttler_grouped_fifo_subset(self): """ THROTTLER (CLIENTS): throttler release subset of waiting requests (grouped fifo). """ set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) # threshold used by throttler name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_throttler_grouped_fifo_nothing(self): """ THROTTLER (CLIENTS): throttler release nothing (grouped fifo). """ # four waiting requests and one active requests but threshold is 1 # more than 80% of the transfer limit are already used -> release nothing set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.WAITING) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.WAITING) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock): [cache_region] = caches_mock rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]], account=account, **archive2) dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)] did_core.add_did(did_type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(did_type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by(**archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be removed, and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exists only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3
def test_hop_penalty(rse_factory, did_factory, root_account, file_config_mock, core_config_mock, caches_mock): """ Test that both global hop_penalty and the per-rse one are correctly taken into consideration """ # +------+ +------+ +------+ # | | | 5 | | | # | RSE1 +--->| RSE2 +--->| RSE3 | # | | | | | | # +------+ +------+ +--^---+ # | # +------+ +------+ | # | | | 20 | | # | RSE4 +--->| RSE5 +-------+ # | | | | # +------+ +------+ rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() rse3, rse3_id = rse_factory.make_posix_rse() rse4, rse4_id = rse_factory.make_posix_rse() rse5, rse5_id = rse_factory.make_posix_rse() all_rses = [rse1_id, rse2_id, rse3_id, rse4_id, rse5_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse2_id, rse3_id, ranking=10) distance_core.add_distance(rse4_id, rse5_id, ranking=10) distance_core.add_distance(rse5_id, rse3_id, ranking=10) rse_core.add_rse_attribute(rse2_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'hop_penalty', 20) did = did_factory.random_did() replica_core.add_replica(rse_id=rse1_id, account=root_account, bytes_=1, **did) replica_core.add_replica(rse_id=rse4_id, account=root_account, bytes_=1, **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', ignore_availability=True) # Ensure the path was created through the correct middle hop request_core.get_request_by_did(rse_id=rse2_id, **did) with pytest.raises(RequestNotFound): request_core.get_request_by_did(rse_id=rse5_id, **did)
def test_release_waiting_requests_grouped_fifo(self): """ REQUEST (CORE): release waiting requests based on grouped FIFO. """ if self.dialect == 'mysql': return True # set max_volume to 0 to check first without releasing extra requests set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=0, max_transfers=1, session=self.db_session) # one request with an unattached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # one request with an attached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() dataset_name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) add_did(self.scope, dataset_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset_name, [{ 'name': name, 'scope': self.scope }], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'rule_id': generate_uuid(), 'retry_count': 1, 'scope': self.scope, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # five requests with different requested_at and multiple attachments per collection -> release only one request -> two requests of one collection should be released self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() name5 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) dataset_2_name = generate_uuid() add_did(self.scope, dataset_2_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name5, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }, { 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_2_name, [{ 'name': name3, 'scope': self.scope }, { 'name': name4, 'scope': self.scope }], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'requested_at': datetime.now().replace(year=2015), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'requested_at': datetime.now().replace(year=2010), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name5, 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) request_5 = get_request_by_did(self.scope, name5, self.dest_rse_id, session=self.db_session) assert_equal(request_5['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> three requests should be released because of attachments and free volume space self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) requests = [ { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace( year=2021 ), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } } ] queue_requests(requests, session=self.db_session) amount_updated_requests = release_waiting_requests_grouped_fifo( self.dest_rse_id, count=1, session=self.db_session) assert_equal(amount_updated_requests, 3) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> two requests should be released because of attachments self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=5, max_transfers=1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume after releasing the two requests above request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING)
did_meta = None try: did_meta = did.get_metadata(response['scope'], response['name'], session=session) except: logging.critical('DID %s:%s NOT FOUND - Cannot re-register replica - potential dark data' % (response['scope'], response['name'])) raise if did_meta: try: replica.add_replica(rse_name, response['scope'], response['name'], did_meta['bytes'], did_meta['account'], adler32=did_meta['adler32'], tombstone=datetime.datetime.utcnow(), session=session) except: logging.critical('Cannot register replica for DID %s:%s at RSE %s - potential dark data' % (response['scope'], response['name'], rse_name)) raise except: # could not update successful lock record_timer('daemons.conveyor.common.update_request_state.replica-update_replicas_states', (time.time()-tss)*1000) logging.warn("Could not update replica state for successful transfer %s:%s at %s: %s" % (response['scope'], response['name'], rse_name,
def request_transfer(once=False, src=None, dst=None): """ Main loop to request a new transfer. """ logging.info('request: starting') site_a = 'RSE%s' % generate_uuid().upper() site_b = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not src.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' srctoken = src.split(':')[0] dsttoken = dst.split(':')[0] tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_a) tmp_proto['hostname'] = src.split(':')[1][2:] tmp_proto['port'] = src.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': srctoken, 'web_service_path': ''} rse.add_protocol(site_a, tmp_proto) tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_b) tmp_proto['hostname'] = dst.split(':')[1][2:] tmp_proto['port'] = dst.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': dsttoken, 'web_service_path': ''} rse.add_protocol(site_b, tmp_proto) si = rsemanager.get_rse_info(site_a) session = get_session() logging.info('request: started') while not graceful_stop.is_set(): try: ts = time.time() tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name] # create the directories if needed p = rsemanager.create_protocol(si, operation='write', scheme=scheme) p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session) break # add the replica replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes')}], account='root', session=session) # add rule for the dataset ts = time.time() rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', copies=1, rse_expression=site_b, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name)) record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000) record_counter('daemons.mock.conveyorinjector.request_transfer') session.commit() except: session.rollback() logging.critical(traceback.format_exc()) if once: return logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } new_vo = {'vo': 'new'} if not vo_core.vo_exists(**new_vo): vo_core.add_vo(description='Test', email='*****@*****.**', **new_vo) if not scope_core.check_scope(InternalScope('data13_hip', **new_vo)): scope_core.add_scope(InternalScope('data13_hip', **new_vo), InternalAccount('root', **new_vo)) nb_rses = 2 else: vo = {} new_vo = {} nb_rses = 1 mock_protocol = { 'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } } nb_files = 30 file_size = 2147483648 # 2G rse_names = [] all_file_names = [] for j in range(nb_rses): rse_name = rse_name_generator() rse_names.append(rse_name) rse_id = rse_core.add_rse(rse_name, **vo) rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) if new_vo: rse_id_new = rse_core.add_rse(rse_name, **new_vo) rse_core.add_protocol(rse_id=rse_id_new, parameter=mock_protocol) file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) if new_vo: replica_core.add_replica( rse_id=rse_id_new, scope=InternalScope('data13_hip', **new_vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **new_vo), adler32=None, md5=None) all_file_names.append(file_names) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if new_vo: rse_core.set_rse_usage(rse_id=rse_id_new, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id_new, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id_new, name='MaxBeingDeletedFiles', value=10) if not vo: reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_name))) == nb_files - 5 else: # Check we reap all VOs by default reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_names[0]))) == nb_files - 5 assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **new_vo), 'name': n } for n in all_file_names[0]], rse_expression=rse_names[0]))) == nb_files - 5 # Check we don't affect a second VO that isn't specified reaper(once=True, rses=[], include_rses=rse_names[1], exclude_rses=[], vos=['new']) reaper(once=True, rses=[], include_rses=rse_names[1], exclude_rses=[], vos=['new']) assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in all_file_names[1]], rse_expression=rse_names[1]))), nb_files assert len( list( replica_core.list_replicas( dids=[{ 'scope': InternalScope('data13_hip', **new_vo), 'name': n } for n in all_file_names[1]], rse_expression=rse_names[1]))), nb_files - 5
def test_reaper(): """ REAPER (DAEMON): Test the reaper daemon.""" rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name) mock_protocol = { 'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } } rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip'), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root'), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) rses = [ rse_core.get_rse(rse_id), ] reaper(once=True, rses=rses) reaper(once=True, rses=rses) assert_equal( len( list( replica_core.list_replicas(dids=[{ 'scope': InternalScope('data13_hip'), 'name': n } for n in file_names], rse_expression=rse_name))), nb_files - 10)
def test_source_avoid_deletion(vo, caches_mock, core_config_mock, rse_factory, did_factory, root_account, file_factory): """ Test that sources on a file block it from deletion """ _, reaper_region = caches_mock src_rse1, src_rse1_id = rse_factory.make_mock_rse() src_rse2, src_rse2_id = rse_factory.make_mock_rse() dst_rse, dst_rse_id = rse_factory.make_mock_rse() all_rses = [src_rse1_id, src_rse2_id, dst_rse_id] any_source = f'{src_rse1}|{src_rse2}' for rse_id in [src_rse1_id, src_rse2_id]: rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=1, free=0) distance_core.add_distance(src_rse1_id, dst_rse_id, ranking=20) distance_core.add_distance(src_rse2_id, dst_rse_id, ranking=10) # Upload a test file to both rses without registering did = did_factory.random_did() # Register replica on one source RSE replica_core.add_replica(rse_id=src_rse1_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Reaper will not delete a file which only has one replica if there is any pending transfer for it reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1 # Register replica on second source rse replica_core.add_replica(rse_id=src_rse2_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 # Submit the transfer. This will create the sources. submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # None of the replicas will be removed. They are protected by an entry in the sources table reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 @transactional_session def __delete_sources(rse_id, scope, name, session=None): session.execute( delete(Source).where(Source.rse_id == rse_id, Source.scope == scope, Source.name == name)) # Deletion succeeds for one replica (second still protected by existing request) __delete_sources(src_rse1_id, **did) __delete_sources(src_rse2_id, **did) reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} new_vo = {'vo': 'new'} if not vo_core.vo_exists(**new_vo): vo_core.add_vo(description='Test', email='*****@*****.**', **new_vo) if not scope_core.check_scope(InternalScope('data13_hip', **new_vo)): scope_core.add_scope(InternalScope('data13_hip', **new_vo), InternalAccount('root', **new_vo)) nb_rses = 2 else: vo = {} new_vo = {} nb_rses = 1 mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} nb_files = 250 file_size = 200 # 2G rse_names = [] all_file_names = [] for j in range(nb_rses): rse_name = rse_name_generator() rse_names.append(rse_name) rse_id = rse_core.add_rse(rse_name, **vo) rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) if new_vo: rse_id_new = rse_core.add_rse(rse_name, **new_vo) rse_core.add_protocol(rse_id=rse_id_new, parameter=mock_protocol) file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) if new_vo: replica_core.add_replica(rse_id=rse_id_new, scope=InternalScope('data13_hip', **new_vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **new_vo), adler32=None, md5=None) all_file_names.append(file_names) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if new_vo: rse_core.set_rse_usage(rse_id=rse_id_new, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id_new, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id_new, name='MaxBeingDeletedFiles', value=10) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() if not vo: assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Now put it over threshold and delete rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == 200 else: # Check we reap all VOs by default reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200 assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **new_vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200