def test_multihop_requests_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ rs0_name, src_rse_id = rse_factory.make_posix_rse() _, intermediate_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(intermediate_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, intermediate_rse_id, ranking=10) add_distance(intermediate_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(rs0_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert transfer[1].get('parent_request') == transfer[0].rws.request_id # the intermediate request was correctly created assert request_core.get_request_by_did(rse_id=intermediate_rse_id, **did)
def test_s3s_fts_dst(self): """ S3: TPC a file from storage to S3 """ expected_src_url = 'https://somestorage.ch:1094/my/prefix/mock/ab/01/file-on-storage?copy_mode=push' expected_dst_url = 's3s://fake-rucio.s3-eu-south-8.amazonaws.com:443/mock/ab/01/file-on-storage' rule_id = add_rule(dids=self.filenons3, account=self.root, copies=1, rse_expression=self.rses3, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) requestss = get_transfer_requests_and_source_replicas( rses=[self.rses3]) for requests in requestss: for request in requests: if requests[request]['rule_id'] == rule_id[0]: assert requests[request]['sources'][0][ 1] == expected_src_url assert requests[request]['dest_urls'][ 0] == expected_dst_url
def __get_transfers(total_workers=0, worker_number=0, failover_schemes=None, limit=None, activity=None, older_than=None, rses=None, schemes=None, mock=False, max_sources=4, bring_online=43200, retry_other_fts=False, transfertool=None, logger=logging.log): """ Get transfers to process :param total_workers: Number of total workers. :param worker_number: Id of the executing worker. :param failover_schemes: Failover schemes. :param limit: Integer of requests to retrieve. :param activity: Activity to be selected. :param older_than: Only select requests older than this DateTime. :param rses: List of rse_id to select requests. :param schemes: Schemes to process. :param mock: Mock testing. :param max_sources: Max sources. :param bring_online: Bring online timeout. :param logger: Optional decorated logger that can be passed from the calling daemons or servers. :param retry_other_fts: Retry other fts servers if needed :param transfertool: The transfer tool as specified in rucio.cfg :returns: List of transfers """ transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source = transfer_core.get_transfer_requests_and_source_replicas(total_workers=total_workers, worker_number=worker_number, limit=limit, activity=activity, older_than=older_than, rses=rses, schemes=schemes, bring_online=bring_online, retry_other_fts=retry_other_fts, failover_schemes=failover_schemes, transfertool=transfertool) request_core.set_requests_state(reqs_no_source, RequestState.NO_SOURCES, logger=logger) request_core.set_requests_state(reqs_only_tape_source, RequestState.ONLY_TAPE_SOURCES, logger=logger) request_core.set_requests_state(reqs_scheme_mismatch, RequestState.MISMATCH_SCHEME, logger=logger) for request_id in transfers: sources = transfers[request_id]['sources'] sources = __sort_ranking(sources, logger=logger) if len(sources) > max_sources: sources = sources[:max_sources] if not mock: transfers[request_id]['sources'] = sources else: transfers[request_id]['sources'] = __mock_sources(sources) # remove link_ranking in the final sources sources = transfers[request_id]['sources'] transfers[request_id]['sources'] = [] for rse, source_url, source_rse_id, ranking, link_ranking in sources: transfers[request_id]['sources'].append((rse, source_url, source_rse_id, ranking)) transfers[request_id]['file_metadata']['src_rse'] = sources[0][0] transfers[request_id]['file_metadata']['src_rse_id'] = sources[0][2] logger(logging.DEBUG, "Transfer for request(%s): %s", request_id, transfers[request_id]) return transfers
def __get_transfers(total_workers=0, worker_number=0, failover_schemes=None, limit=None, activity=None, older_than=None, rses=None, schemes=None, max_sources=4, bring_online=43200, retry_other_fts=False, transfertool=None, logger=logging.log): """ Get transfers to process :param total_workers: Number of total workers. :param worker_number: Id of the executing worker. :param failover_schemes: Failover schemes. :param limit: Integer of requests to retrieve. :param activity: Activity to be selected. :param older_than: Only select requests older than this DateTime. :param rses: List of rse_id to select requests. :param schemes: Schemes to process. :param max_sources: Max sources. :param bring_online: Bring online timeout. :param logger: Optional decorated logger that can be passed from the calling daemons or servers. :param retry_other_fts: Retry other fts servers if needed :param transfertool: The transfer tool as specified in rucio.cfg :returns: List of transfers """ transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source = transfer_core.get_transfer_requests_and_source_replicas(total_workers=total_workers, worker_number=worker_number, limit=limit, activity=activity, older_than=older_than, rses=rses, schemes=schemes, bring_online=bring_online, retry_other_fts=retry_other_fts, failover_schemes=failover_schemes, transfertool=transfertool) if reqs_no_source: logger(logging.INFO, "Marking requests as no-sources: %s", reqs_no_source) request_core.set_requests_state_if_possible(reqs_no_source, RequestState.NO_SOURCES, logger=logger) if reqs_only_tape_source: logger(logging.INFO, "Marking requests as only-tape-sources: %s", reqs_only_tape_source) request_core.set_requests_state_if_possible(reqs_only_tape_source, RequestState.ONLY_TAPE_SOURCES, logger=logger) if reqs_scheme_mismatch: logger(logging.INFO, "Marking requests as scheme-mismatch: %s", reqs_scheme_mismatch) request_core.set_requests_state_if_possible(reqs_scheme_mismatch, RequestState.MISMATCH_SCHEME, logger=logger) for request_id in transfers: logger(logging.DEBUG, "Transfer for request(%s): %s", request_id, transfers[request_id]) return transfers
def __get_transfers(process=None, total_processes=None, thread=None, total_threads=None, failover_schemes=None, limit=None, activity=None, older_than=None, rses=None, schemes=None, mock=False, max_sources=4, bring_online=43200, retry_other_fts=False): """ Get transfers to process :param process: Identifier of the caller process as an integer. :param total_processes: Maximum number of processes as an integer. :param thread: Identifier of the caller thread as an integer. :param total_threads: Maximum number of threads as an integer. :param failover_schemes: Failover schemes. :param limit: Integer of requests to retrieve. :param activity: Activity to be selected. :param older_than: Only select requests older than this DateTime. :param rses: List of rse_id to select requests. :param schemes: Schemes to process. :param mock: Mock testing. :param max_sources: Max sources. :bring_online: Bring online timeout. :retry_other_fts: Retry other fts servers if needed :returns: List of transfers """ transfers, reqs_no_source, reqs_scheme_mismatch, reqs_only_tape_source = transfer_core.get_transfer_requests_and_source_replicas( process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, limit=limit, activity=activity, older_than=older_than, rses=rses, schemes=schemes, bring_online=bring_online, retry_other_fts=retry_other_fts, failover_schemes=failover_schemes) request_core.set_requests_state(reqs_no_source, RequestState.NO_SOURCES) request_core.set_requests_state(reqs_only_tape_source, RequestState.ONLY_TAPE_SOURCES) request_core.set_requests_state(reqs_scheme_mismatch, RequestState.MISMATCH_SCHEME) for request_id in transfers: sources = transfers[request_id]['sources'] sources = __sort_ranking(sources) if len(sources) > max_sources: sources = sources[:max_sources] if not mock: transfers[request_id]['sources'] = sources else: transfers[request_id]['sources'] = __mock_sources(sources) # remove link_ranking in the final sources sources = transfers[request_id]['sources'] transfers[request_id]['sources'] = [] for source in sources: rse, source_url, source_rse_id, ranking, link_ranking = source transfers[request_id]['sources'].append( (rse, source_url, source_rse_id, ranking)) transfers[request_id]['file_metadata']['src_rse'] = sources[0][0] transfers[request_id]['file_metadata']['src_rse_id'] = sources[0][2] logging.debug("Transfer for request(%s): %s" % (request_id, transfers[request_id])) return transfers
def test_singlehop_vs_multihop_priority(rse_factory, root_account, mock_scope, core_config_mock, caches_mock): """ On small distance difference, singlehop is prioritized over multihop due to HOP_PENALTY. On big difference, multihop is prioritized """ # +------+ +------+ # | | 10 | | # | RSE0 +--->| RSE1 | # | | | +-+ 10 # +------+ +------+ | +------+ +------+ # +->| | 200 | | # +------+ | RSE3 |<------| RSE4 | # | | 30 +--->| | | | # | RSE2 +-----------+ +------+ +------+ # | | # +------+ _, rse0_id = rse_factory.make_posix_rse() _, rse1_id = rse_factory.make_posix_rse() _, rse2_id = rse_factory.make_posix_rse() rse3_name, rse3_id = rse_factory.make_posix_rse() _, rse4_id = rse_factory.make_posix_rse() add_distance(rse0_id, rse1_id, ranking=10) add_distance(rse1_id, rse3_id, ranking=10) add_distance(rse2_id, rse3_id, ranking=30) add_distance(rse4_id, rse3_id, ranking=200) rse_core.add_rse_attribute(rse1_id, 'available_for_multihop', True) # add same file to two source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse2_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The singlehop must be prioritized transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer) == 1 assert transfer[0]['file_metadata']['src_rse_id'] == rse2_id assert transfer[0]['file_metadata']['dest_rse_id'] == rse3_id # add same file to two source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse4_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The multihop must be prioritized transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) transfer = next( iter(t for t in transfers.values() if t[0].rws.name == file['name'])) assert len(transfer) == 2
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope): tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id] all_rses = source_rses + [dst_rse_id] add_distance(disk1_rse_id, dst_rse_id, ranking=15) add_distance(disk2_rse_id, dst_rse_id, ranking=10) add_distance(tape1_rse_id, dst_rse_id, ranking=15) add_distance(tape2_rse_id, dst_rse_id, ranking=10) # add same file to all source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in source_rses: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) @transactional_session def __fake_source_ranking(source_rse_id, new_ranking, session=None): rowcount = session.query(models.Source).filter( models.Source.rse_id == source_rse_id).update( {'ranking': new_ranking}) if not rowcount: models.Source(request_id=request['id'], scope=request['scope'], name=request['name'], rse_id=source_rse_id, dest_rse_id=request['dest_rse_id'], ranking=new_ranking, bytes=request['bytes'], url=None, is_using=False). \ save(session=session, flush=False) # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 2 assert transfer[0]['sources'][0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs. Tape RSEs must now be preferred. # Multiple tape sources are not allowed. Only one tape RSE source must be returned. __fake_source_ranking(disk1_rse_id, -1) __fake_source_ranking(disk2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] in (tape1_rse_name, tape2_rse_name) # On equal source ranking, but different distance; the smaller distance is preferred transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape2_rse_name # On different source ranking, the bigger ranking is preferred __fake_source_ranking(tape2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape1_rse_name
def test_tpc(containerized_rses, root_account, test_scope, did_factory, rse_client, rule_client, artifact): if len(containerized_rses) < 2: pytest.skip("TPC tests need at least 2 containerized rse's for execution}") rse1_name, rse1_id = containerized_rses[0] rse2_name, rse2_id = containerized_rses[1] base_file_name = generate_uuid() test_file = did_factory.upload_test_file(rse1_name, name=base_file_name + '.000', return_full_item=True) test_file_did_str = '%s:%s' % (test_file['did_scope'], test_file['did_name']) test_file_did = { 'scope': test_scope, 'name': test_file['did_name'] } test_file_name_hash = hashlib.md5(test_file_did_str.encode('utf-8')).hexdigest() test_file_expected_pfn = '%s/%s/%s/%s' % (test_file_did['scope'], test_file_name_hash[0:2], test_file_name_hash[2:4], test_file_did['name']) rse1_hostname = rse_client.get_protocols(rse1_name)[0]['hostname'] rse2_hostname = rse_client.get_protocols(rse2_name)[0]['hostname'] rule_id = add_rule(dids=[test_file_did], account=root_account, copies=1, rse_expression=rse2_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) rule = rule_client.get_replication_rule(rule_id[0]) re_evaluator(once=True) assert rule['locks_ok_cnt'] == 0 assert rule['locks_replicating_cnt'] == 1 transfer_requestss = get_transfer_requests_and_source_replicas(rses=[rse1_id, rse2_id]) for transfer_requests in transfer_requestss: for transfer_request in transfer_requests: if transfer_requests[transfer_request][0]['rule_id'] == rule_id[0]: src_url = transfer_requests[transfer_request][0]['sources'][0][1] dest_url = transfer_requests[transfer_request][0]['dest_urls'][0] check_url(src_url, rse1_hostname, test_file_expected_pfn) check_url(dest_url, rse2_hostname, test_file_expected_pfn) # Run Submitter submitter.run(once=True) # Get FTS transfer job info fts_transfer_id, fts_transfer_status = list_fts_transfer() # Check FTS transfer job assert fts_transfer_id is not None # Wait for the FTS transfer to finish fts_transfer_status = None for _ in range(MAX_POLL_WAIT_SECONDS): fts_transfer_status = poll_fts_transfer_status(fts_transfer_id) if fts_transfer_status not in ['SUBMITTED', 'ACTIVE']: break time.sleep(1) assert fts_transfer_status == 'FINISHED' poller.run(once=True, older_than=0) finisher.run(once=True) rule = rule_client.get_replication_rule(rule_id[0]) assert rule['locks_ok_cnt'] == 1 assert rule['locks_replicating_cnt'] == 0 if artifact is not None: date = datetime.date.today().strftime("%Y-%m-%d") with open(artifact, 'w') as artifact_file: artifact_file.write( f"/var/log/fts3/{date}/{rse1_name.lower()}__{rse2_name.lower()}/*__{fts_transfer_id}" )