def test_fts_recoverable_failures_handled_on_multihop( vo, did_factory, root_account, replica_client, file_factory, core_config_mock, caches_mock): """ Verify that the poller correctly handles recoverable FTS job failures """ src_rse = 'XRD1' src_rse_id = rse_core.get_rse_id(rse=src_rse, vo=vo) jump_rse = 'XRD3' jump_rse_id = rse_core.get_rse_id(rse=jump_rse, vo=vo) dst_rse = 'XRD4' dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo) all_rses = [src_rse_id, jump_rse_id, dst_rse_id] # Create and upload a real file, but register it with wrong checksum. This will trigger # a FTS "Recoverable" failure on checksum validation local_file = file_factory.file_generator() did = did_factory.random_did() did_factory.upload_client.upload([{ 'path': local_file, 'rse': src_rse, 'did_scope': did['scope'].external, 'did_name': did['name'], 'no_register': True, }]) replica_client.add_replicas(rse=src_rse, files=[{ 'scope': did['scope'].external, 'name': did['name'], 'bytes': 1, 'adler32': 'aaaaaaaa' }]) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=2, partition_wait_time=None, transfertype='single', filter_transfertool=None) request = __wait_for_request_state(dst_rse_id=dst_rse_id, state=RequestState.FAILED, **did) assert request['state'] == RequestState.FAILED request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) assert request['state'] == RequestState.FAILED
def test_release_waiting_requests_all(self): """ REQUEST (CORE): release all waiting requests. """ name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': 'User Subscription', 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_all_waiting_requests(self.dest_rse_id, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED)
def test_ignore_availability(rse_factory, did_factory, root_account, core_config_mock, caches_mock): def __setup_test(): src_rse, src_rse_id = rse_factory.make_posix_rse() dst_rse, dst_rse_id = rse_factory.make_posix_rse() distance_core.add_distance(src_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) rse_core.update_rse(src_rse_id, {'availability_read': False}) return src_rse_id, dst_rse_id, did src_rse_id, dst_rse_id, did = __setup_test() submitter(once=True, rses=[{ 'id': rse_id } for rse_id in (src_rse_id, dst_rse_id)], partition_wait_time=None, transfertool='mock', transfertype='single') request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.NO_SOURCES src_rse_id, dst_rse_id, did = __setup_test() submitter(once=True, rses=[{ 'id': rse_id } for rse_id in (src_rse_id, dst_rse_id)], partition_wait_time=None, transfertool='mock', transfertype='single', ignore_availability=True) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.SUBMITTED
def test_multihop_concurrent_submitters(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multiple concurrent submitters on the same multi-hop don't result in an undesired database state """ src_rse, src_rse_id = rse_factory.make_posix_rse() jump_rse, jump_rse_id = rse_factory.make_posix_rse() dst_rse, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(jump_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, jump_rse_id, ranking=10) add_distance(jump_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) nb_threads = 9 nb_executions = 18 with ThreadPoolExecutor(max_workers=nb_threads) as executor: futures = [ executor.submit(next_transfers_to_submit, rses=rse_factory.created_rses) for _ in range(nb_executions) ] for f in futures: try: f.result() except Exception: pass jmp_request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) dst_request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert jmp_request['state'] == dst_request['state'] == RequestState.QUEUED assert jmp_request['attributes']['source_replica_expression'] == src_rse assert jmp_request['attributes']['is_intermediate_hop']
def test_request_submitted_in_order(rse_factory, did_factory, root_account): src_rses = [rse_factory.make_posix_rse() for _ in range(2)] dst_rses = [rse_factory.make_posix_rse() for _ in range(3)] for _, src_rse_id in src_rses: for _, dst_rse_id in dst_rses: distance_core.add_distance(src_rse_id=src_rse_id, dest_rse_id=dst_rse_id, ranking=10) distance_core.add_distance(src_rse_id=dst_rse_id, dest_rse_id=src_rse_id, ranking=10) # Create a certain number of files on source RSEs with replication rules towards random destination RSEs nb_files = 15 dids = [] requests = [] src_rses_iterator = itertools.cycle(src_rses) dst_rses_iterator = itertools.cycle(dst_rses) for _ in range(nb_files): src_rse_name, src_rse_id = next(src_rses_iterator) dst_rse_name, dst_rse_id = next(dst_rses_iterator) did = did_factory.upload_test_file(rse_name=src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) requests.append(request_core.get_request_by_did(rse_id=dst_rse_id, **did)) dids.append(did) # Forge request creation time to a random moment in the past hour @transactional_session def _forge_requests_creation_time(session=None): base_time = datetime.utcnow().replace(microsecond=0, minute=0) - timedelta(hours=1) assigned_times = set() for request in requests: request_creation_time = None while not request_creation_time or request_creation_time in assigned_times: # Ensure uniqueness to avoid multiple valid submission orders and make tests deterministic with simple sorting techniques request_creation_time = base_time + timedelta(minutes=randint(0, 3600)) assigned_times.add(request_creation_time) session.query(Request).filter(Request.id == request['id']).update({'created_at': request_creation_time}) request['created_at'] = request_creation_time _forge_requests_creation_time() requests = sorted(requests, key=lambda r: r['created_at']) for request in requests: assert request_core.get_request(request_id=request['id'])['state'] == RequestState.QUEUED requests_id_in_submission_order = [] with patch('rucio.transfertool.mock.MockTransfertool.submit') as mock_transfertool_submit: # Record the order of requests passed to MockTranfertool.submit() mock_transfertool_submit.side_effect = lambda jobs, _: requests_id_in_submission_order.extend([j['metadata']['request_id'] for j in jobs]) submitter(once=True, rses=[{'id': rse_id} for _, rse_id in dst_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) for request in requests: assert request_core.get_request(request_id=request['id'])['state'] == RequestState.SUBMITTED # Requests must be submitted in the order of their creation assert requests_id_in_submission_order == [r['id'] for r in requests]
def test_request_submitted(rse_factory, file_factory, root_account): """ Conveyor (DAEMON): Test the submitter""" src_rse_name, src_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() distance_core.add_distance(src_rse_id=src_rse_id, dest_rse_id=dst_rse_id, ranking=10) distance_core.add_distance(src_rse_id=dst_rse_id, dest_rse_id=src_rse_id, ranking=10) did = file_factory.upload_test_file(rse_name=src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.QUEUED # run submitter with a RSE filter which doesn't contain the needed one submitter(once=True, rses=[{'id': src_rse_id}], mock=True, transfertool='mock', transfertype='bulk', filter_transfertool=None, bulk=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.QUEUED submitter(once=True, rses=[{'id': dst_rse_id}], mock=True, transfertool='mock', transfertype='bulk', filter_transfertool=None, bulk=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.SUBMITTED
def test_fts_non_recoverable_failures_handled_on_multihop( vo, did_factory, root_account, replica_client, core_config_mock, caches_mock): """ Verify that the poller correctly handles non-recoverable FTS job failures """ src_rse = 'XRD1' src_rse_id = rse_core.get_rse_id(rse=src_rse, vo=vo) jump_rse = 'XRD3' jump_rse_id = rse_core.get_rse_id(rse=jump_rse, vo=vo) dst_rse = 'XRD4' dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo) all_rses = [src_rse_id, jump_rse_id, dst_rse_id] # Register a did which doesn't exist. It will trigger an non-recoverable error during the FTS transfer. did = did_factory.random_did() replica_client.add_replicas(rse=src_rse, files=[{ 'scope': did['scope'].external, 'name': did['name'], 'bytes': 1, 'adler32': 'aaaaaaaa' }]) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=2, partition_wait_time=None, transfertype='single', filter_transfertool=None) request = __wait_for_request_state(dst_rse_id=dst_rse_id, state=RequestState.FAILED, **did) assert request['state'] == RequestState.FAILED request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) assert request['state'] == RequestState.FAILED
def get_request_by_did(scope, name, rse, issuer): """ Retrieve a request by its DID for a destination RSE. :param scope: The scope of the data identifier as a string. :param name: The name of the data identifier as a string. :param rse: The destination RSE of the request as a string. :param issuer: Issuing account as a string. :returns: Request as a dictionary. """ kwargs = {'scope': scope, 'name': name, 'rse': rse, 'issuer': issuer} if not permission.has_permission(issuer=issuer, action='get_request_by_did', kwargs=kwargs): raise exception.AccessDenied('%(issuer)s cannot retrieve the request DID %(scope)s:%(name)s to RSE %(rse)s' % locals()) return request.get_request_by_did(scope, name, rse)
def get_request_by_did(scope, name, rse, issuer): """ Retrieve a request by its DID for a destination RSE. :param scope: The scope of the data identifier as a string. :param name: The name of the data identifier as a string. :param rse: The destination RSE of the request as a string. :param issuer: Issuing account as a string. :returns: Request as a dictionary. """ rse_id = get_rse_id(rse=rse) kwargs = {'scope': scope, 'name': name, 'rse': rse, 'rse_id': rse_id, 'issuer': issuer} if not permission.has_permission(issuer=issuer, action='get_request_by_did', kwargs=kwargs): raise exception.AccessDenied('%(issuer)s cannot retrieve the request DID %(scope)s:%(name)s to RSE %(rse)s' % locals()) return request.get_request_by_did(scope, name, rse_id)
def test_multihop_requests_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ rs0_name, src_rse_id = rse_factory.make_posix_rse() _, intermediate_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(intermediate_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, intermediate_rse_id, ranking=10) add_distance(intermediate_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(rs0_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) [[_, [transfer]]] = next_transfers_to_submit(rses=rse_factory.created_rses).items() # the intermediate request was correctly created assert request_core.get_request_by_did(rse_id=intermediate_rse_id, **did)
def __wait_for_request_state(dst_rse_id, scope, name, state, max_wait_seconds=MAX_POLL_WAIT_SECONDS, run_poller=True): """ Wait for the request state to be updated to the given expected state as a result of a pending transfer """ request = None for _ in range(max_wait_seconds): if run_poller: poller(once=True, older_than=0, partition_wait_time=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, scope=scope, name=name) if request['state'] == state: break time.sleep(1) return request
def test_multihop_sources_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ src_rse_name, src_rse_id = rse_factory.make_posix_rse() _, jump_rse1_id = rse_factory.make_posix_rse() _, jump_rse2_id = rse_factory.make_posix_rse() _, jump_rse3_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() jump_rses = [jump_rse1_id, jump_rse2_id, jump_rse3_id] all_rses = jump_rses + [src_rse_id, dst_rse_id] for rse_id in jump_rses: rse_core.add_rse_attribute(rse_id, 'available_for_multihop', True) distance_core.add_distance(src_rse_id, jump_rse1_id, ranking=10) distance_core.add_distance(jump_rse1_id, jump_rse2_id, ranking=10) distance_core.add_distance(jump_rse2_id, jump_rse3_id, ranking=10) distance_core.add_distance(jump_rse3_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{'id': rse_id} for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # Ensure that each intermediate request was correctly created for rse_id in jump_rses: assert request_core.get_request_by_did(rse_id=rse_id, **did) @read_session def __ensure_source_exists(rse_id, scope, name, session=None): return session.query(Source). \ filter(Source.rse_id == rse_id). \ filter(Source.scope == scope). \ filter(Source.name == name). \ one() # Ensure that sources where created for transfers for rse_id in jump_rses + [src_rse_id]: __ensure_source_exists(rse_id, **did)
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope): tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id] all_rses = source_rses + [dst_rse_id] add_distance(disk1_rse_id, dst_rse_id, ranking=15) add_distance(disk2_rse_id, dst_rse_id, ranking=10) add_distance(tape1_rse_id, dst_rse_id, ranking=15) add_distance(tape2_rse_id, dst_rse_id, ranking=10) # add same file to all source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in source_rses: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 2 assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs. Disk still preferred, because it must fail twice before tape is tried __fake_source_ranking(request, disk1_rse_id, -1) __fake_source_ranking(request, disk2_rse_id, -1) [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 2 assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs again. Tape RSEs must now be preferred. # Multiple tape sources are not allowed. Only one tape RSE source must be returned. __fake_source_ranking(request, disk1_rse_id, -2) __fake_source_ranking(request, disk2_rse_id, -2) [[_, transfers]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfers) == 1 transfer = transfers[0] assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] in (tape1_rse_name, tape2_rse_name) # On equal source ranking, but different distance; the smaller distance is preferred [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] == tape2_rse_name # On different source ranking, the bigger ranking is preferred __fake_source_ranking(request, tape2_rse_id, -1) [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] == tape1_rse_name
def test_multihop_intermediate_replica_lifecycle(vo, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that intermediate replicas created by the submitter are protected from deletion even if their tombstone is set to epoch. After successful transfers, intermediate replicas with default (epoch) tombstone must be removed. The others must be left intact. """ src_rse1_name = 'XRD1' src_rse1_id = rse_core.get_rse_id(rse=src_rse1_name, vo=vo) src_rse2_name = 'XRD2' src_rse2_id = rse_core.get_rse_id(rse=src_rse2_name, vo=vo) jump_rse_name = 'XRD3' jump_rse_id = rse_core.get_rse_id(rse=jump_rse_name, vo=vo) dst_rse_name = 'XRD4' dst_rse_id = rse_core.get_rse_id(rse=dst_rse_name, vo=vo) all_rses = [src_rse1_id, src_rse2_id, jump_rse_id, dst_rse_id] did = did_factory.upload_test_file(src_rse1_name) # Copy replica to a second source. To avoid the special case of having a unique last replica, which could be handled in a special (more careful) way rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=src_rse2_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) replica = __wait_for_replica_transfer(dst_rse_id=src_rse2_id, **did) assert replica['state'] == ReplicaState.AVAILABLE rse_core.set_rse_limits(rse_id=jump_rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=jump_rse_id, source='storage', used=1, free=0) try: rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Submit transfers to FTS # Ensure a replica was created on the intermediary host with epoch tombstone submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) assert request['state'] == RequestState.SUBMITTED replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica['tombstone'] == datetime(year=1970, month=1, day=1) assert replica['state'] == ReplicaState.COPYING # The intermediate replica is protected by its state (Copying) rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses=jump_rse_name, exclude_rses=None) replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica['state'] == ReplicaState.COPYING # Wait for the intermediate replica to become ready replica = __wait_for_replica_transfer(dst_rse_id=jump_rse_id, **did) assert replica['state'] == ReplicaState.AVAILABLE # The intermediate replica is protected by an entry in the sources table # Reaper must not remove this replica, even if it has an obsolete tombstone rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses=jump_rse_name, exclude_rses=None) replica = replica_core.get_replica(rse_id=jump_rse_id, **did) assert replica # FTS fails the second transfer, so run submitter again to copy from jump rse to destination rse submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertype='single', filter_transfertool=None) # Wait for the destination replica to become ready replica = __wait_for_replica_transfer(dst_rse_id=dst_rse_id, **did) assert replica['state'] == ReplicaState.AVAILABLE rucio.daemons.reaper.reaper.REGION.invalidate() reaper(once=True, rses=[], include_rses='test_container_xrd=True', exclude_rses=None) with pytest.raises(ReplicaNotFound): replica_core.get_replica(rse_id=jump_rse_id, **did) finally: @transactional_session def _cleanup_all_usage_and_limits(rse_id, session=None): session.query(models.RSELimit).filter_by(rse_id=rse_id).delete() session.query(models.RSEUsage).filter_by( rse_id=rse_id, source='storage').delete() _cleanup_all_usage_and_limits(rse_id=jump_rse_id)
def test_release_waiting_requests_grouped_fifo(self): """ REQUEST (CORE): release waiting requests based on grouped FIFO. """ if self.dialect == 'mysql': return True # set max_volume to 0 to check first without releasing extra requests set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=0, max_transfers=1, session=self.db_session) # one request with an unattached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # one request with an attached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() dataset_name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) add_did(self.scope, dataset_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset_name, [{ 'name': name, 'scope': self.scope }], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'rule_id': generate_uuid(), 'retry_count': 1, 'scope': self.scope, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # five requests with different requested_at and multiple attachments per collection -> release only one request -> two requests of one collection should be released self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() name5 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) dataset_2_name = generate_uuid() add_did(self.scope, dataset_2_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name5, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }, { 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_2_name, [{ 'name': name3, 'scope': self.scope }, { 'name': name4, 'scope': self.scope }], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'requested_at': datetime.now().replace(year=2015), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'requested_at': datetime.now().replace(year=2010), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name5, 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) request_5 = get_request_by_did(self.scope, name5, self.dest_rse_id, session=self.db_session) assert_equal(request_5['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> three requests should be released because of attachments and free volume space self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) requests = [ { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace( year=2021 ), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } } ] queue_requests(requests, session=self.db_session) amount_updated_requests = release_waiting_requests_grouped_fifo( self.dest_rse_id, count=1, session=self.db_session) assert_equal(amount_updated_requests, 3) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> two requests should be released because of attachments self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name1, 'scope': self.scope }], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{ 'name': name2, 'scope': self.scope }], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=5, max_transfers=1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume after releasing the two requests above request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_multihop_sources_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ src_rse_name, src_rse_id = rse_factory.make_posix_rse() _, jump_rse1_id = rse_factory.make_posix_rse() _, jump_rse2_id = rse_factory.make_posix_rse() _, jump_rse3_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() jump_rses = [jump_rse1_id, jump_rse2_id, jump_rse3_id] all_rses = jump_rses + [src_rse_id, dst_rse_id] for rse_id in jump_rses: rse_core.add_rse_attribute(rse_id, 'available_for_multihop', True) rse_tombstone_delay = 3600 rse_multihop_tombstone_delay = 12 * 3600 default_multihop_tombstone_delay = 24 * 3600 # if both attributes are set, the multihop one will take precedence rse_core.add_rse_attribute(jump_rse1_id, 'tombstone_delay', rse_tombstone_delay) rse_core.add_rse_attribute(jump_rse1_id, 'multihop_tombstone_delay', rse_multihop_tombstone_delay) # if multihop delay not set, it's the default multihop takes precedence. Not normal tombstone delay. rse_core.add_rse_attribute(jump_rse2_id, 'tombstone_delay', rse_tombstone_delay) core_config.set(section='transfers', option='multihop_tombstone_delay', value=default_multihop_tombstone_delay) # if multihop delay is set to 0, the replica will have no tombstone rse_core.add_rse_attribute(jump_rse3_id, 'multihop_tombstone_delay', 0) distance_core.add_distance(src_rse_id, jump_rse1_id, ranking=10) distance_core.add_distance(jump_rse1_id, jump_rse2_id, ranking=10) distance_core.add_distance(jump_rse2_id, jump_rse3_id, ranking=10) distance_core.add_distance(jump_rse3_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{'id': rse_id} for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # Ensure that each intermediate request was correctly created for rse_id in jump_rses: assert request_core.get_request_by_did(rse_id=rse_id, **did) @read_session def __ensure_source_exists(rse_id, scope, name, session=None): return session.query(Source). \ filter(Source.rse_id == rse_id). \ filter(Source.scope == scope). \ filter(Source.name == name). \ one() # Ensure that sources where created for transfers for rse_id in jump_rses + [src_rse_id]: __ensure_source_exists(rse_id, **did) # Ensure the tombstone is correctly set on intermediate replicas expected_tombstone = datetime.utcnow() + timedelta(seconds=rse_multihop_tombstone_delay) replica = replica_core.get_replica(jump_rse1_id, **did) assert expected_tombstone - timedelta(minutes=5) < replica['tombstone'] < expected_tombstone + timedelta(minutes=5) expected_tombstone = datetime.utcnow() + timedelta(seconds=default_multihop_tombstone_delay) replica = replica_core.get_replica(jump_rse2_id, **did) assert expected_tombstone - timedelta(minutes=5) < replica['tombstone'] < expected_tombstone + timedelta(minutes=5) replica = replica_core.get_replica(jump_rse3_id, **did) assert replica['tombstone'] is None
def test_queue_requests_state(vo, use_preparer): """ REQUEST (CORE): test queuing requests """ if use_preparer == 'preparer enabled': use_preparer = True elif use_preparer == 'preparer disabled': use_preparer = False else: return pytest.xfail(reason=f'unknown test parameter use_preparer={use_preparer}') db_session = session.get_session() dest_rse = 'MOCK' dest_rse2 = 'MOCK2' source_rse = 'MOCK4' source_rse2 = 'MOCK5' dest_rse_id = get_rse_id(dest_rse, vo=vo) dest_rse_id2 = get_rse_id(dest_rse2, vo=vo) source_rse_id = get_rse_id(source_rse, vo=vo) source_rse_id2 = get_rse_id(source_rse2, vo=vo) scope = InternalScope('mock', vo=vo) account = InternalAccount('root', vo=vo) user_activity = 'User Subscription' config_set('conveyor', 'use_preparer', str(use_preparer)) target_state = RequestState.PREPARING if use_preparer else RequestState.QUEUED name = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() add_replica(source_rse_id, scope, name, 1, account, session=db_session) add_replica(source_rse_id2, scope, name2, 1, account, session=db_session) add_replica(source_rse_id, scope, name3, 1, account, session=db_session) set_rse_transfer_limits(dest_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(dest_rse_id2, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id2, user_activity, max_transfers=1, session=db_session) requests = [{ 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id2, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'unknown', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id2, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }] try: queue_requests(requests, session=db_session) request = get_request_by_did(scope, name, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name2, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name3, dest_rse_id2, session=db_session) assert request['state'] == target_state finally: config_remove_option('conveyor', 'use_preparer') db_session.query(models.Source).delete() db_session.query(models.Request).delete() db_session.query(models.RSETransferLimit).delete() db_session.query(models.Distance).delete() db_session.commit() reset_config_table()
def test_release_waiting_requests_per_free_volume(self): """ REQUEST (CORE): release waiting requests that fit grouped in available volume.""" if self.dialect == 'mysql': return True # release unattached requests that fit in available volume with respect to already submitted transfers name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because small enough request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # still waiting because requested later and to big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # still waiting because too big request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release attached requests that fit together with the dataset in available volume with respect to already submitted transfers self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) name4 = generate_uuid() add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) dataset1_name = generate_uuid() add_did(self.scope, dataset1_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset1_name, [{ 'name': name1, 'scope': self.scope }, { 'name': name4, 'scope': self.scope }], self.account, session=self.db_session) dataset2_name = generate_uuid() add_did(self.scope, dataset2_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset2_name, [{ 'name': name2, 'scope': self.scope }, { 'name': name3, 'scope': self.scope }], self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 6, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2030), 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because dataset fits in volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # waiting because dataset is too big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release requests with no available volume -> release nothing self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.dest_rse_id, self.scope, name1, 1, self.account, session=self.db_session) volume = 0 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # waiting because no available volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING)
def test_throttler_grouped_fifo_nothing(self): """ THROTTLER (CLIENTS): throttler release nothing (grouped fifo). """ # four waiting requests and one active requests but threshold is 1 # more than 80% of the transfer limit are already used -> release nothing set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.WAITING) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.WAITING) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_release_waiting_requests_fifo(self): """ REQUEST (CORE): release waiting requests based on FIFO. """ # without account and activity check # two requests -> release one request -> request with oldest requested_at date should be released name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request2['state'], constants.RequestState.WAITING) # with activity and account check # two requests -> release two request -> requests with correct account and activity should be released self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) requests = [ { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'attributes': { 'activity': 'ignore', 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': InternalAccount('jdoe'), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace( year=2020 ), # requested latest but account and activity are correct 'name': name4, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'account': self.account, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } } ] queue_requests(requests, session=self.db_session) release_waiting_requests_fifo(self.dest_rse_id, count=2, account=self.account, activity=self.user_activity, session=self.db_session) request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED)
def test_hop_penalty(rse_factory, did_factory, root_account, file_config_mock, core_config_mock, caches_mock): """ Test that both global hop_penalty and the per-rse one are correctly taken into consideration """ # +------+ +------+ +------+ # | | | 5 | | | # | RSE1 +--->| RSE2 +--->| RSE3 | # | | | | | | # +------+ +------+ +--^---+ # | # +------+ +------+ | # | | | 20 | | # | RSE4 +--->| RSE5 +-------+ # | | | | # +------+ +------+ rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() rse3, rse3_id = rse_factory.make_posix_rse() rse4, rse4_id = rse_factory.make_posix_rse() rse5, rse5_id = rse_factory.make_posix_rse() all_rses = [rse1_id, rse2_id, rse3_id, rse4_id, rse5_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse2_id, rse3_id, ranking=10) distance_core.add_distance(rse4_id, rse5_id, ranking=10) distance_core.add_distance(rse5_id, rse3_id, ranking=10) rse_core.add_rse_attribute(rse2_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'hop_penalty', 20) did = did_factory.random_did() replica_core.add_replica(rse_id=rse1_id, account=root_account, bytes_=1, **did) replica_core.add_replica(rse_id=rse4_id, account=root_account, bytes_=1, **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', ignore_availability=True) # Ensure the path was created through the correct middle hop request_core.get_request_by_did(rse_id=rse2_id, **did) with pytest.raises(RequestNotFound): request_core.get_request_by_did(rse_id=rse5_id, **did)
def test_multisource_receiver(vo, did_factory, replica_client, root_account): """ Run receiver as a background thread to automatically handle fts notifications. Ensure that a multi-source job in which the first source fails is correctly handled by receiver. """ receiver_thread = threading.Thread(target=receiver, kwargs={ 'id': 0, 'full_mode': True, 'all_vos': True, 'total_threads': 1 }) receiver_thread.start() try: src_rse1 = 'XRD4' src_rse1_id = rse_core.get_rse_id(rse=src_rse1, vo=vo) src_rse2 = 'XRD1' src_rse2_id = rse_core.get_rse_id(rse=src_rse2, vo=vo) dst_rse = 'XRD3' dst_rse_id = rse_core.get_rse_id(rse=dst_rse, vo=vo) all_rses = [src_rse1_id, src_rse2_id, dst_rse_id] # Add a good replica on the RSE which has a higher distance ranking did = did_factory.upload_test_file(src_rse1) # Add non-existing replica which will fail during multisource transfers on the RSE with lower cost (will be the preferred source) replica_client.add_replicas(rse=src_rse2, files=[{ 'scope': did['scope'].external, 'name': did['name'], 'bytes': 1, 'adler32': 'aaaaaaaa' }]) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=2, partition_wait_time=None, transfertype='single', filter_transfertool=None) request = None for _ in range(MAX_POLL_WAIT_SECONDS): request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) # The request must not be marked as failed. Not even temporarily. It is a multi-source transfer and the # the first, failed, source must not change the replica state. We must wait for all sources to be tried. assert request['state'] != RequestState.FAILED if request['state'] == RequestState.DONE: break time.sleep(1) assert request['state'] == RequestState.DONE finally: receiver_graceful_stop.set() receiver_thread.join(timeout=5) receiver_graceful_stop.clear()
def test_throttler_fifo_release_all(self): """ THROTTLER (CLIENTS): throttler release all waiting requests (fifo). """ # no threshold -> release all waiting requests name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request['state'], constants.RequestState.QUEUED) request2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request2['state'], constants.RequestState.QUEUED) # active transfers + waiting requests are less than the threshold -> release all waiting requests self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) set('throttler', '%s,%s' % (self.user_activity, self.dest_rse), 3, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request['state'], constants.RequestState.QUEUED)
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope): tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id] all_rses = source_rses + [dst_rse_id] add_distance(disk1_rse_id, dst_rse_id, ranking=15) add_distance(disk2_rse_id, dst_rse_id, ranking=10) add_distance(tape1_rse_id, dst_rse_id, ranking=15) add_distance(tape2_rse_id, dst_rse_id, ranking=10) # add same file to all source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in source_rses: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) @transactional_session def __fake_source_ranking(source_rse_id, new_ranking, session=None): rowcount = session.query(models.Source).filter( models.Source.rse_id == source_rse_id).update( {'ranking': new_ranking}) if not rowcount: models.Source(request_id=request['id'], scope=request['scope'], name=request['name'], rse_id=source_rse_id, dest_rse_id=request['dest_rse_id'], ranking=new_ranking, bytes=request['bytes'], url=None, is_using=False). \ save(session=session, flush=False) # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 2 assert transfer[0]['sources'][0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs. Tape RSEs must now be preferred. # Multiple tape sources are not allowed. Only one tape RSE source must be returned. __fake_source_ranking(disk1_rse_id, -1) __fake_source_ranking(disk2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] in (tape1_rse_name, tape2_rse_name) # On equal source ranking, but different distance; the smaller distance is preferred transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape2_rse_name # On different source ranking, the bigger ranking is preferred __fake_source_ranking(tape2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape1_rse_name
def __create_missing_replicas_and_requests( transfer_path: "List[DirectTransferDefinition]", default_tombstone_delay: int, logger: "Callable", session: "Optional[Session]" = None) -> "Tuple[bool, bool]": """ Create replicas and requests in the database for the intermediate hops """ initial_request_id = transfer_path[-1].rws.request_id creation_successful = True must_skip_submission = False # Iterate the path in reverse order. The last hop is the initial request, so # next_hop.rws.request_id will always be initialized when handling the current hop. for i in reversed(range(len(transfer_path))): hop = transfer_path[i] rws = hop.rws if rws.request_id: continue tombstone_delay = rws.dest_rse.attributes.get( 'multihop_tombstone_delay', default_tombstone_delay) try: tombstone = tombstone_from_delay(tombstone_delay) except ValueError: logger(logging.ERROR, "%s: Cannot parse multihop tombstone delay %s", initial_request_id, tombstone_delay) creation_successful = False break files = [{ 'scope': rws.scope, 'name': rws.name, 'bytes': rws.byte_count, 'adler32': rws.adler32, 'md5': rws.md5, 'tombstone': tombstone, 'state': 'C' }] try: add_replicas(rse_id=rws.dest_rse.id, files=files, account=rws.account, ignore_availability=False, dataset_meta=None, session=session) # Set replica state to Copying in case replica already existed in another state. # Can happen when a multihop transfer failed previously, and we are re-scheduling it now. update_replica_state(rse_id=rws.dest_rse.id, scope=rws.scope, name=rws.name, state=ReplicaState.COPYING, session=session) except Exception as error: logger(logging.ERROR, '%s: Problem adding replicas on %s : %s', initial_request_id, rws.dest_rse, str(error)) rws.attributes['is_intermediate_hop'] = True # next_hop_request_id and initial_request_id are not used anymore in rucio >=1.28, but are needed # for running at the same time 1.27 and 1.28 on the same database. # TODO: remove following two rows rws.attributes['next_hop_request_id'] = transfer_path[i + 1].rws.request_id rws.attributes['initial_request_id'] = initial_request_id rws.attributes['source_replica_expression'] = hop.src.rse.name req_to_queue = { 'dest_rse_id': rws.dest_rse.id, 'state': RequestState.QUEUED, 'scope': rws.scope, 'name': rws.name, 'rule_id': '00000000000000000000000000000000', # Dummy Rule ID used for multihop. TODO: Replace with actual rule_id once we can flag intermediate requests 'attributes': rws.attributes, 'request_type': rws.request_type, 'retry_count': rws.retry_count, 'account': rws.account, 'requested_at': datetime.datetime.now() } if rws.transfertool: req_to_queue['transfertool'] = rws.transfertool new_req = queue_requests(requests=[req_to_queue], session=session) # If a request already exists, new_req will be an empty list. if new_req: db_req = new_req[0] logger( logging.DEBUG, '%s: New request created for the transfer between %s and %s : %s', initial_request_id, transfer_path[0].src, transfer_path[-1].dst, db_req['id']) else: db_req = request_core.get_request_by_did(rws.scope, rws.name, rws.dest_rse.id, session=session) # A transfer already exists for part of the path. Just construct the remaining # path, but don't submit the transfer. We must wait for the existing transfer to be # completed before continuing. must_skip_submission = True logger(logging.DEBUG, '%s: Reusing intermediate hop between %s and %s : %s', initial_request_id, transfer_path[0].src, transfer_path[-1].dst, db_req['id']) models.TransferHop( request_id=db_req['id'], next_hop_request_id=transfer_path[i + 1].rws.request_id, initial_request_id=initial_request_id, ).save(session=session, flush=False) rws.request_id = db_req['id'] rws.requested_at = db_req['requested_at'] return creation_successful, must_skip_submission
def test_throttler_grouped_fifo_subset(self): """ THROTTLER (CLIENTS): throttler release subset of waiting requests (grouped fifo). """ set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) # threshold used by throttler name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_throttler_fifo_release_subset(self): """ THROTTLER (CLIENTS): throttler release subset of waiting requests (fifo). """ # two waiting requests and no active requests but threshold is 1 -> release only 1 request set('throttler', '%s,%s' % (self.user_activity, self.dest_rse), 1, session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse) assert_equal(request['state'], constants.RequestState.QUEUED) request2 = get_request_by_did(self.scope, name2, self.dest_rse) assert_equal(request2['state'], constants.RequestState.WAITING)
def test_throttler_grouped_fifo_all(self): """ THROTTLER (CLIENTS): throttler release all waiting requests (grouped fifo). """ # no threshold -> release all waiting requests name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.QUEUED)
def test_globus(rse_factory, did_factory, root_account): """ Test bulk submissions with globus transfertool. Rely on mocks, because we don't contact a real globus server in tests """ # +------+ +------+ # | | | | # | RSE1 +--->| RSE2 | # | | | | # +------+ +------+ # # +------+ +------+ # | | | | # | RSE3 +--->| RSE4 | # | | | | # +------+ +------+ rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() rse3, rse3_id = rse_factory.make_posix_rse() rse4, rse4_id = rse_factory.make_posix_rse() all_rses = [rse1_id, rse2_id, rse3_id, rse4_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse3_id, rse4_id, ranking=10) for rse_id in all_rses: rse_core.add_rse_attribute(rse_id, 'globus_endpoint_id', rse_id) # Single submission did1 = did_factory.upload_test_file(rse1) rule_core.add_rule(dids=[did1], account=root_account, copies=1, rse_expression=rse2, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) did2 = did_factory.upload_test_file(rse3) rule_core.add_rule(dids=[did2], account=root_account, copies=1, rse_expression=rse4, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) with patch( 'rucio.transfertool.globus.bulk_submit_xfer') as mock_bulk_submit: mock_bulk_submit.return_value = 0 submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=10, partition_wait_time=None, transfertool='globus', transfertype='single', filter_transfertool=None) # Called separately for each job assert len(mock_bulk_submit.call_args_list) == 2 (submitjob, ), _kwargs = mock_bulk_submit.call_args_list[0] assert len(submitjob) == 1 # Bulk submission did1 = did_factory.upload_test_file(rse1) rule_core.add_rule(dids=[did1], account=root_account, copies=1, rse_expression=rse2, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) did2 = did_factory.upload_test_file(rse3) rule_core.add_rule(dids=[did2], account=root_account, copies=1, rse_expression=rse4, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) with patch( 'rucio.transfertool.globus.bulk_submit_xfer') as mock_bulk_submit: mock_bulk_submit.return_value = 0 submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=10, partition_wait_time=None, transfertool='globus', transfertype='bulk', filter_transfertool=None) mock_bulk_submit.assert_called_once() (submitjob, ), _kwargs = mock_bulk_submit.call_args_list[0] # both jobs were grouped together and submitted in one call assert len(submitjob) == 2 job_did1 = next( iter( filter(lambda job: did1['name'] in job['sources'][0], submitjob))) assert len(job_did1['sources']) == 1 assert len(job_did1['destinations']) == 1 assert job_did1['metadata']['src_rse'] == rse1 assert job_did1['metadata']['dst_rse'] == rse2 assert job_did1['metadata']['name'] == did1['name'] assert job_did1['metadata']['source_globus_endpoint_id'] == rse1_id assert job_did1['metadata']['dest_globus_endpoint_id'] == rse2_id job_did2 = next( iter( filter(lambda job: did2['name'] in job['sources'][0], submitjob))) assert len(job_did2['sources']) == 1 assert len(job_did2['destinations']) == 1 assert job_did2['metadata']['src_rse'] == rse3 assert job_did2['metadata']['dst_rse'] == rse4 assert job_did2['metadata']['name'] == did2['name'] request = request_core.get_request_by_did(rse_id=rse2_id, **did1) assert request['state'] == RequestState.SUBMITTED request = request_core.get_request_by_did(rse_id=rse4_id, **did2) assert request['state'] == RequestState.SUBMITTED
def test_tpc(containerized_rses, root_account, test_scope, did_factory, rse_client, rule_client, artifact): if len(containerized_rses) < 2: pytest.skip( "TPC tests need at least 2 containerized rse's for execution}") rse1_name, rse1_id = containerized_rses[0] rse2_name, rse2_id = containerized_rses[1] base_file_name = generate_uuid() test_file = did_factory.upload_test_file(rse1_name, name=base_file_name + '.000', return_full_item=True) test_file_did_str = '%s:%s' % (test_file['did_scope'], test_file['did_name']) test_file_did = {'scope': test_scope, 'name': test_file['did_name']} test_file_name_hash = hashlib.md5( test_file_did_str.encode('utf-8')).hexdigest() test_file_expected_pfn = '%s/%s/%s/%s' % ( test_file_did['scope'], test_file_name_hash[0:2], test_file_name_hash[2:4], test_file_did['name']) rse1_hostname = rse_client.get_protocols(rse1_name)[0]['hostname'] rse2_hostname = rse_client.get_protocols(rse2_name)[0]['hostname'] rule_id = add_rule(dids=[test_file_did], account=root_account, copies=1, rse_expression=rse2_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) rule = rule_client.get_replication_rule(rule_id[0]) re_evaluator(once=True) assert rule['locks_ok_cnt'] == 0 assert rule['locks_replicating_cnt'] == 1 [[_, [transfer_path]] ] = next_transfers_to_submit(rses=[rse1_id, rse2_id]).items() assert transfer_path[0].rws.rule_id == rule_id[0] src_url = transfer_path[0].legacy_sources[0][1] dest_url = transfer_path[0].dest_url check_url(src_url, rse1_hostname, test_file_expected_pfn) check_url(dest_url, rse2_hostname, test_file_expected_pfn) # Run Submitter submitter.submitter(once=True) # Get FTS transfer job id request = get_request_by_did(rse_id=rse2_id, **test_file_did) fts_transfer_id = request['external_id'] # Check FTS transfer job assert fts_transfer_id is not None # Wait for the FTS transfer to finish fts_transfer_status = None for _ in range(MAX_POLL_WAIT_SECONDS): fts_transfer_status = poll_fts_transfer_status(fts_transfer_id) if fts_transfer_status not in ['SUBMITTED', 'ACTIVE']: break time.sleep(1) assert fts_transfer_status == 'FINISHED' poller.run(once=True, older_than=0) finisher.run(once=True) rule = rule_client.get_replication_rule(rule_id[0]) assert rule['locks_ok_cnt'] == 1 assert rule['locks_replicating_cnt'] == 0 if artifact is not None: date = datetime.date.today().strftime("%Y-%m-%d") with open(artifact, 'w') as artifact_file: artifact_file.write( f"/var/log/fts3/{date}/{rse1_name.lower()}__{rse2_name.lower()}/*__{fts_transfer_id}" )
def test_throttler_fifo_release_nothing(self): """ THROTTLER (CLIENTS): throttler release nothing (fifo). """ # two waiting requests and one active requests but threshold is 1 # more than 80% of the transfer limit are already used -> release nothing set('throttler', '%s,%s' % (self.user_activity, self.dest_rse), 1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() add_replica(self.source_rse, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse, self.scope, name2, 1, self.account, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2020), 'name': name2, 'account': self.account, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request = get_request_by_did(self.scope, name1, self.dest_rse) assert_equal(request['state'], constants.RequestState.WAITING) request2 = get_request_by_did(self.scope, name2, self.dest_rse) assert_equal(request2['state'], constants.RequestState.WAITING)