def test_multihop_concurrent_submitters(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multiple concurrent submitters on the same multi-hop don't result in an undesired database state """ src_rse, src_rse_id = rse_factory.make_posix_rse() jump_rse, jump_rse_id = rse_factory.make_posix_rse() dst_rse, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(jump_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, jump_rse_id, ranking=10) add_distance(jump_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) nb_threads = 9 nb_executions = 18 with ThreadPoolExecutor(max_workers=nb_threads) as executor: futures = [executor.submit(next_transfers_to_submit, rses=rse_factory.created_rses) for _ in range(nb_executions)] for f in futures: try: f.result() except Exception: pass jmp_request = request_core.get_request_by_did(rse_id=jump_rse_id, **did) dst_request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert jmp_request['state'] == dst_request['state'] == RequestState.QUEUED assert jmp_request['attributes']['source_replica_expression'] == src_rse assert jmp_request['attributes']['initial_request_id'] == dst_request['id'] assert jmp_request['attributes']['next_hop_request_id'] == dst_request['id']
def import_distances(distances, vo='def', session=None): for src_rse_name in distances: src = rse_module.get_rse_id(rse=src_rse_name, vo=vo, session=session) for dest_rse_name in distances[src_rse_name]: dest = rse_module.get_rse_id(rse=dest_rse_name, vo=vo, session=session) distance = distances[src_rse_name][dest_rse_name] if 'src_rse_id' in distance: del distance['src_rse_id'] if 'dest_rse_id' in distance: del distance['dest_rse_id'] old_distance = distance_module.get_distances(src_rse_id=src, dest_rse_id=dest, session=session) if old_distance: distance_module.update_distances(src_rse_id=src, dest_rse_id=dest, parameters=distance, session=session) else: distance_module.add_distance( src_rse_id=src, dest_rse_id=dest, ranking=distance.get('ranking'), agis_distance=distance.get('agis_distance'), geoip_distance=distance.get('geoip_distance'), active=distance.get('active'), submitted=distance.get('submitted'), transfer_speed=distance.get('transfer_speed'), finished=distance.get('finished'), failed=distance.get('failed'), session=session)
def test_multihop_requests_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ rs0_name, src_rse_id = rse_factory.make_posix_rse() _, intermediate_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(intermediate_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, intermediate_rse_id, ranking=10) add_distance(intermediate_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(rs0_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) [[_, [transfer]] ] = next_transfers_to_submit(rses=rse_factory.created_rses).items() # the intermediate request was correctly created assert request_core.get_request_by_did(rse_id=intermediate_rse_id, **did)
def test_multihop_requests_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ rs0_name, src_rse_id = rse_factory.make_posix_rse() _, intermediate_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() rse_core.add_rse_attribute(intermediate_rse_id, 'available_for_multihop', True) add_distance(src_rse_id, intermediate_rse_id, ranking=10) add_distance(intermediate_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(rs0_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert transfer[1].get('parent_request') == transfer[0].rws.request_id # the intermediate request was correctly created assert request_core.get_request_by_did(rse_id=intermediate_rse_id, **did)
def source_rse(db_session, vo, dest_rse): rse = generate_rse(vo=vo, session=db_session) add_distance(rse['id'], dest_rse['id'], ranking=5, session=db_session) db_session.commit() yield rse del_rse(rse['id'], session=db_session) db_session.commit()
def setup(rse): add_distance(rse.rse_id, dest_rse['id'], ranking=2, session=rse.db_session) add_replicas(rse_id=rse.rse_id, files=[file], account=mock_request.account, session=rse.db_session)
def test_request_submitted_in_order(rse_factory, did_factory, root_account): src_rses = [rse_factory.make_posix_rse() for _ in range(2)] dst_rses = [rse_factory.make_posix_rse() for _ in range(3)] for _, src_rse_id in src_rses: for _, dst_rse_id in dst_rses: distance_core.add_distance(src_rse_id=src_rse_id, dest_rse_id=dst_rse_id, ranking=10) distance_core.add_distance(src_rse_id=dst_rse_id, dest_rse_id=src_rse_id, ranking=10) # Create a certain number of files on source RSEs with replication rules towards random destination RSEs nb_files = 15 dids = [] requests = [] src_rses_iterator = itertools.cycle(src_rses) dst_rses_iterator = itertools.cycle(dst_rses) for _ in range(nb_files): src_rse_name, src_rse_id = next(src_rses_iterator) dst_rse_name, dst_rse_id = next(dst_rses_iterator) did = did_factory.upload_test_file(rse_name=src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) requests.append(request_core.get_request_by_did(rse_id=dst_rse_id, **did)) dids.append(did) # Forge request creation time to a random moment in the past hour @transactional_session def _forge_requests_creation_time(session=None): base_time = datetime.utcnow().replace(microsecond=0, minute=0) - timedelta(hours=1) assigned_times = set() for request in requests: request_creation_time = None while not request_creation_time or request_creation_time in assigned_times: # Ensure uniqueness to avoid multiple valid submission orders and make tests deterministic with simple sorting techniques request_creation_time = base_time + timedelta(minutes=randint(0, 3600)) assigned_times.add(request_creation_time) session.query(Request).filter(Request.id == request['id']).update({'created_at': request_creation_time}) request['created_at'] = request_creation_time _forge_requests_creation_time() requests = sorted(requests, key=lambda r: r['created_at']) for request in requests: assert request_core.get_request(request_id=request['id'])['state'] == RequestState.QUEUED requests_id_in_submission_order = [] with patch('rucio.transfertool.mock.MockTransfertool.submit') as mock_transfertool_submit: # Record the order of requests passed to MockTranfertool.submit() mock_transfertool_submit.side_effect = lambda jobs, _: requests_id_in_submission_order.extend([j['metadata']['request_id'] for j in jobs]) submitter(once=True, rses=[{'id': rse_id} for _, rse_id in dst_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) for request in requests: assert request_core.get_request(request_id=request['id'])['state'] == RequestState.SUBMITTED # Requests must be submitted in the order of their creation assert requests_id_in_submission_order == [r['id'] for r in requests]
def add_distance(source, destination, issuer, vo='def', ranking=None, distance=None, geoip_distance=None, active=None, submitted=None, finished=None, failed=None, transfer_speed=None): """ Add a src-dest distance. :param source: The source. :param destination: The destination. :param issuer: The issuer account. :param vo: The VO to act on. :param ranking: Ranking as an integer. :param distance: Distance as an integer. :param geoip_distance: GEOIP Distance as an integer. :param active: Active FTS transfers as an integer. :param submitted: Submitted FTS transfers as an integer. :param finished: Finished FTS transfers as an integer. :param failed: Failed FTS transfers as an integer. :param transfer_speed: FTS transfer speed as an integer. """ kwargs = {'source': source, 'destination': destination} if not permission.has_permission(issuer=issuer, vo=vo, action='add_distance', kwargs=kwargs): raise exception.AccessDenied('Account %s can not add RSE distances' % (issuer)) try: return distance_module.add_distance(src_rse_id=rse_module.get_rse_id(source, vo=vo), dest_rse_id=rse_module.get_rse_id(destination, vo=vo), ranking=ranking, agis_distance=distance, geoip_distance=geoip_distance, active=active, submitted=submitted, finished=finished, failed=failed, transfer_speed=transfer_speed) except exception.Duplicate: # use source and destination RSE names raise exception.Duplicate('Distance from %s to %s already exists!' % (source, destination))
def add_distance(source, destination, issuer, ranking=None, distance=None, geoip_distance=None, active=None, submitted=None, finished=None, failed=None, transfer_speed=None): """ Add a src-dest distance. :param source: The source. :param destination: The destination. :param issuer: The issuer account. :param ranking: Ranking as an integer. :param distance: Distance as an integer. :param geoip_distance: GEOIP Distance as an integer. :param active: Active FTS transfers as an integer. :param submitted: Submitted FTS transfers as an integer. :param finished: Finished FTS transfers as an integer. :param failed: Failed FTS transfers as an integer. :param transfer_speed: FTS transfer speed as an integer. """ kwargs = {'source': source, 'destination': destination} if not permission.has_permission(issuer=issuer, action='add_distance', kwargs=kwargs): raise exception.AccessDenied('Account %s can not add RSE distances' % (issuer)) return distance_module.add_distance(src_rse_id=rse_module.get_rse_id(source), dest_rse_id=rse_module.get_rse_id(destination), ranking=ranking, agis_distance=distance, geoip_distance=geoip_distance, active=active, submitted=submitted, finished=finished, failed=failed, transfer_speed=transfer_speed)
def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': get_vo()} else: self.vo = {} self.root = InternalAccount('root', **self.vo) # add an S3 storage with a replica self.rc = client.ReplicaClient() self.rses3 = rse_name_generator() self.rses3_id = add_rse(self.rses3, **self.vo) add_protocol(self.rses3_id, {'scheme': 'https', 'hostname': 'fake-rucio.s3-eu-south-8.amazonaws.com', 'port': 443, 'prefix': '/', 'impl': 'rucio.rse.protocols.gfal.NoRename', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1}}}) add_rse_attribute(rse_id=self.rses3_id, key='sign_url', value='s3') add_rse_attribute(rse_id=self.rses3_id, key='fts', value='localhost') self.files3 = [{'scope': InternalScope('mock', **self.vo), 'name': 'file-on-aws', 'bytes': 1234, 'adler32': 'deadbeef', 'meta': {'events': 123}}] add_replicas(rse_id=self.rses3_id, files=self.files3, account=self.root) # add a non-S3 storage with a replica self.rsenons3 = rse_name_generator() self.rsenons3_id = add_rse(self.rsenons3, **self.vo) add_protocol(self.rsenons3_id, {'scheme': 'https', 'hostname': 'somestorage.ch', 'port': 1094, 'prefix': '/my/prefix', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1}}}) add_rse_attribute(rse_id=self.rsenons3_id, key='fts', value='localhost') self.filenons3 = [{'scope': InternalScope('mock', **self.vo), 'name': 'file-on-storage', 'bytes': 1234, 'adler32': 'deadbeef', 'meta': {'events': 321}}] add_replicas(rse_id=self.rsenons3_id, files=self.filenons3, account=self.root) # set the distance both ways add_distance(self.rses3_id, self.rsenons3_id, ranking=1, agis_distance=1, geoip_distance=1) add_distance(self.rsenons3_id, self.rses3_id, ranking=1, agis_distance=1, geoip_distance=1)
def test_singlehop_vs_multihop_priority(rse_factory, root_account, mock_scope, core_config_mock, caches_mock): """ On small distance difference, singlehop is prioritized over multihop due to HOP_PENALTY. On big difference, multihop is prioritized """ # +------+ +------+ # | | 10 | | # | RSE0 +--->| RSE1 | # | | | +-+ 10 # +------+ +------+ | +------+ +------+ # +->| | 200 | | # +------+ | RSE3 |<------| RSE4 | # | | 30 +--->| | | | # | RSE2 +-----------+ +------+ +------+ # | | # +------+ _, rse0_id = rse_factory.make_posix_rse() _, rse1_id = rse_factory.make_posix_rse() _, rse2_id = rse_factory.make_posix_rse() rse3_name, rse3_id = rse_factory.make_posix_rse() _, rse4_id = rse_factory.make_posix_rse() add_distance(rse0_id, rse1_id, ranking=10) add_distance(rse1_id, rse3_id, ranking=10) add_distance(rse2_id, rse3_id, ranking=30) add_distance(rse4_id, rse3_id, ranking=200) rse_core.add_rse_attribute(rse1_id, 'available_for_multihop', True) # add same file to two source RSEs file = {'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead'} did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse2_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The singlehop must be prioritized [[_, [transfer]]] = next_transfers_to_submit(rses=rse_factory.created_rses).items() assert len(transfer) == 1 assert transfer[0].src.rse.id == rse2_id assert transfer[0].dst.rse.id == rse3_id # add same file to two source RSEs file = {'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead'} did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse4_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The multihop must be prioritized [[_, transfers]] = next_transfers_to_submit(rses=rse_factory.created_rses).items() transfer = next(iter(t for t in transfers if t[0].rws.name == file['name'])) assert len(transfer) == 2
def __setup_test(): src_rse, src_rse_id = rse_factory.make_posix_rse() dst_rse, dst_rse_id = rse_factory.make_posix_rse() distance_core.add_distance(src_rse_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) rse_core.update_rse(src_rse_id, {'availability_read': False}) return src_rse_id, dst_rse_id, did
def setup(self): self.db_session = session.get_session() self.db_session.query(models.Distance).delete() self.db_session.commit() self.rse_1 = 'MOCK' self.rse_1_id = get_rse_id(self.rse_1) self.rse_2 = 'MOCK2' self.rse_2_id = get_rse_id(self.rse_2) ranking = 10 add_distance(self.rse_1_id, self.rse_2_id, ranking) self.distances = { self.rse_1: { self.rse_2: get_distances(self.rse_1_id, self.rse_2_id)[0] } } self.distances_core = { self.rse_1_id: { self.rse_2_id: get_distances(self.rse_1_id, self.rse_2_id)[0] } }
def test_request_submitted(rse_factory, file_factory, root_account): """ Conveyor (DAEMON): Test the submitter""" src_rse_name, src_rse_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() distance_core.add_distance(src_rse_id=src_rse_id, dest_rse_id=dst_rse_id, ranking=10) distance_core.add_distance(src_rse_id=dst_rse_id, dest_rse_id=src_rse_id, ranking=10) did = file_factory.upload_test_file(rse_name=src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.QUEUED # run submitter with a RSE filter which doesn't contain the needed one submitter(once=True, rses=[{'id': src_rse_id}], mock=True, transfertool='mock', transfertype='bulk', filter_transfertool=None, bulk=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.QUEUED submitter(once=True, rses=[{'id': dst_rse_id}], mock=True, transfertool='mock', transfertype='bulk', filter_transfertool=None, bulk=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) assert request['state'] == RequestState.SUBMITTED
def test_fk_error_on_source_creation(rse_factory, did_factory, root_account): """ verify that ensure_db_sources correctly handles foreign key errors while creating sources """ if get_session().bind.dialect.name == 'sqlite': pytest.skip('Will not run on sqlite') src_rse, src_rse_id = rse_factory.make_mock_rse() dst_rse, dst_rse_id = rse_factory.make_mock_rse() add_distance(src_rse_id, dst_rse_id, ranking=10) did = did_factory.random_did() file = { 'scope': did['scope'], 'name': did['name'], 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } add_replicas(rse_id=src_rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) requests, *_ = get_transfer_paths(rses=[src_rse_id, dst_rse_id]) request_id, [transfer_path] = next(iter(requests.items())) transfer_path[0].rws.request_id = generate_uuid() to_submit, *_ = assign_paths_to_transfertool_and_create_hops(requests) assert not to_submit
def test_multihop_sources_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ src_rse_name, src_rse_id = rse_factory.make_posix_rse() _, jump_rse1_id = rse_factory.make_posix_rse() _, jump_rse2_id = rse_factory.make_posix_rse() _, jump_rse3_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() jump_rses = [jump_rse1_id, jump_rse2_id, jump_rse3_id] all_rses = jump_rses + [src_rse_id, dst_rse_id] for rse_id in jump_rses: rse_core.add_rse_attribute(rse_id, 'available_for_multihop', True) distance_core.add_distance(src_rse_id, jump_rse1_id, ranking=10) distance_core.add_distance(jump_rse1_id, jump_rse2_id, ranking=10) distance_core.add_distance(jump_rse2_id, jump_rse3_id, ranking=10) distance_core.add_distance(jump_rse3_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{'id': rse_id} for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # Ensure that each intermediate request was correctly created for rse_id in jump_rses: assert request_core.get_request_by_did(rse_id=rse_id, **did) @read_session def __ensure_source_exists(rse_id, scope, name, session=None): return session.query(Source). \ filter(Source.rse_id == rse_id). \ filter(Source.scope == scope). \ filter(Source.name == name). \ one() # Ensure that sources where created for transfers for rse_id in jump_rses + [src_rse_id]: __ensure_source_exists(rse_id, **did)
def setup(rse): add_distance(rse.rse_id, dest_rse['id'], ranking=5, session=rse.db_session)
def import_data(data, session=None): """ Import data to add and update records in Rucio. :param data: data to be imported as dictionary. :param session: database session in use. """ # RSEs rses = data.get('rses') if rses: for rse in rses: protocols = rse.get('protocols') if protocols: protocols = protocols.get('protocols') del rse['protocols'] rse_name = rse['rse'] del rse['rse'] if not rse_module.rse_exists(rse_name, session=session): rse_module.add_rse(rse_name, deterministic=rse.get('deterministic'), volatile=rse.get('volatile'), city=rse.get('city'), region_code=rse.get('region_code'), country_name=rse.get('country_name'), staging_area=rse.get('staging_area'), continent=rse.get('continent'), time_zone=rse.get('time_zone'), ISP=rse.get('ISP'), rse_type=rse.get('rse_type'), latitude=rse.get('latitude'), longitude=rse.get('longitude'), ASN=rse.get('ASN'), availability=rse.get('availability'), session=session) else: rse_module.update_rse(rse_name, rse, session=session) # Protocols if protocols: old_protocols = rse_module.get_rse_protocols(rse=rse_name, session=session) for protocol in protocols: scheme = protocol.get('scheme') hostname = protocol.get('hostname') port = protocol.get('port') intersection = [ old_protocol for old_protocol in old_protocols['protocols'] if old_protocol['scheme'] == scheme and old_protocol['hostname'] == hostname and old_protocol['port'] == port ] if intersection: del protocol['scheme'] del protocol['hostname'] del protocol['port'] rse_module.update_protocols(rse=rse_name, scheme=scheme, data=protocol, hostname=hostname, port=port, session=session) else: rse_module.add_protocol(rse=rse_name, parameter=protocol, session=session) # Limits limits = rse.get('limits') if limits: old_limits = rse_module.get_rse_limits(rse=rse_name, session=session) for limit in limits: if limit in old_limits: rse_module.delete_rse_limit(rse=rse_name, name=limit, session=session) rse_module.set_rse_limits(rse=rse_name, name=limit, value=limits[limit], session=session) # Transfer limits transfer_limits = rse.get('transfer_limits') if transfer_limits: for limit in transfer_limits: old_transfer_limits = rse_module.get_rse_transfer_limits( rse=rse_name, activity=limit, session=session) if limit in old_transfer_limits: rse_module.delete_rse_transfer_limits(rse=rse_name, activity=limit, session=session) max_transfers = transfer_limits[limit].items( )[0][1]['max_transfers'] rse_module.set_rse_transfer_limits( rse=rse_name, activity=limit, max_transfers=max_transfers, session=session) # Attributes attributes = rse.get('attributes') if attributes: old_attributes = rse_module.list_rse_attributes( rse=rse_name, session=session) for attr in attributes: if attr in old_attributes: rse_module.del_rse_attribute(rse=rse_name, key=attr, session=session) rse_module.add_rse_attribute(rse=rse_name, key=attr, value=attributes[attr], session=session) # Distances distances = data.get('distances') if distances: for src_rse_name in distances: src = rse_module.get_rse_id(src_rse_name, session=session) for dest_rse_name in distances[src_rse_name]: dest = rse_module.get_rse_id(dest_rse_name, session=session) distance = distances[src_rse_name][dest_rse_name] del distance['src_rse_id'] del distance['dest_rse_id'] old_distance = distance_module.get_distances(src_rse_id=src, dest_rse_id=dest, session=session) if old_distance: distance_module.update_distances(src_rse_id=src, dest_rse_id=dest, parameters=distance, session=session) else: distance_module.add_distance( src_rse_id=src, dest_rse_id=dest, ranking=distance.get('ranking'), agis_distance=distance.get('agis_distance'), geoip_distance=distance.get('geoip_distance'), active=distance.get('active'), submitted=distance.get('submitted'), transfer_speed=distance.get('transfer_speed'), finished=distance.get('finished'), failed=distance.get('failed'), session=session)
def setup(self): # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() self.old_rse_id_1 = add_rse(self.old_rse_1, availability=1, region_code='DE', country_name='DE', deterministic=True, volatile=True, staging_area=True, time_zone='Europe', latitude='1', longitude='2') add_protocol( self.old_rse_id_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'TODO' }) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme3', 'hostname': 'hostname3', 'port': 1000, 'impl': 'TODO' }) set_rse_limits(rse_id=self.old_rse_id_1, name='MaxBeingDeletedFiles', value='10') set_rse_limits(rse_id=self.old_rse_id_1, name='MinFreeSpace', value='10') add_rse_attribute(rse_id=self.old_rse_id_1, key='attr1', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='lfn2pfn_algorithm', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='verify_checksum', value=True) # RSE 2 that already exists self.old_rse_2 = rse_name_generator() self.old_rse_id_2 = add_rse(self.old_rse_2) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() self.old_rse_id_3 = add_rse(self.old_rse_3) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() self.old_rse_id_4 = add_rse(self.old_rse_4) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() add_rse(self.old_rse_4) self.old_rse_id_4 = get_rse_id(self.old_rse_4) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) # Account 1 that already exists self.old_account_1 = InternalAccount(rse_name_generator()) add_account(self.old_account_1, AccountType.USER, email='test') # Account 2 that already exists self.old_account_2 = InternalAccount(rse_name_generator()) add_account(self.old_account_2, AccountType.USER, email='test') # Identity that should be removed self.identity_to_be_removed = rse_name_generator() add_identity(self.identity_to_be_removed, IdentityType.X509, email='email') add_account_identity(self.identity_to_be_removed, IdentityType.X509, self.old_account_2, 'email') # Identity that already exsits but should be added to the account self.identity_to_be_added_to_account = rse_name_generator() add_identity(self.identity_to_be_added_to_account, IdentityType.X509, email='email') self.data1 = { 'rses': { self.new_rse: { 'rse_type': RSEType.TAPE, 'availability': 3, 'city': 'NewCity', 'region_code': 'CH', 'country_name': 'switzerland', 'staging_area': False, 'time_zone': 'Europe', 'latitude': 1, 'longitude': 2, 'deterministic': True, 'volatile': False, 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }], 'attributes': { 'attr1': 'test' }, 'MinFreeSpace': 20000, 'lfn2pfn_algorithm': 'hash2', 'verify_checksum': False, 'availability_delete': True, 'availability_read': False, 'availability_write': True }, self.old_rse_1: { 'rse_type': RSEType.TAPE, 'deterministic': False, 'volatile': False, 'region_code': 'US', 'country_name': 'US', 'staging_area': False, 'time_zone': 'Asia', 'longitude': 5, 'city': 'City', 'availability': 2, 'latitude': 10, 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }], 'attributes': { 'attr1': 'test1', 'attr2': 'test2' }, 'MinFreeSpace': 10000, 'MaxBeingDeletedFiles': 1000, 'verify_checksum': False, 'lfn2pfn_algorithm': 'hash3', 'availability_delete': False, 'availability_read': False, 'availability_write': True }, self.old_rse_2: {}, self.old_rse_3: {} }, 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_3, 'ranking': 4 } } }, 'accounts': [{ 'account': InternalAccount('new_account'), 'email': 'email', 'identities': [{ 'type': 'userpass', 'identity': 'username', 'password': '******' }] }, { 'account': InternalAccount('new_account2'), 'email': 'email' }, { 'account': self.old_account_2, 'email': 'new_email', 'identities': [{ 'identity': self.identity_to_be_added_to_account, 'type': 'x509' }, { 'type': 'userpass', 'identity': 'username2', 'password': '******' }] }, { 'account': InternalAccount('jdoe'), 'email': 'email' }] } self.data2 = {'rses': {self.new_rse: {'rse': self.new_rse}}} self.data3 = {'distances': {}}
def test_get_hops(rse_factory): # Build the following topology: # +------+ +------+ 10 +------+ # | | 40 | +-----------+ | # | RSE0 | +--------+ RSE1 | | RSE2 +-------------+ # | | | | | +----+ | | # +------+ | +------+ | +------+ | <missing_cost> # | | | # | | | # | | | # +------+ | +------+ 10 | +------+ +-+----+ # | +--+ | +------+ | | --20-> | | # | RSE3 | --10-> | RSE4 | | RSE5 +-->--->--->+ RSE6 | # | +-->--->--->+ +-----------+ | | | # +----+-+ +------+ 10 +-+----+ +------+ # | | # | 50 | # +----------------------------------+ # _, rse0_id = rse_factory.make_mock_rse() _, rse1_id = rse_factory.make_mock_rse() _, rse2_id = rse_factory.make_mock_rse() _, rse3_id = rse_factory.make_mock_rse() _, rse4_id = rse_factory.make_mock_rse() _, rse5_id = rse_factory.make_mock_rse() _, rse6_id = rse_factory.make_mock_rse() all_rses = [rse0_id, rse1_id, rse2_id, rse3_id, rse4_id, rse5_id, rse6_id] add_distance(rse1_id, rse3_id, ranking=40) add_distance(rse1_id, rse2_id, ranking=10) add_distance(rse2_id, rse1_id, ranking=10) add_distance(rse2_id, rse4_id, ranking=10) add_distance(rse3_id, rse1_id, ranking=40) add_distance(rse3_id, rse4_id, ranking=10) add_distance(rse3_id, rse5_id, ranking=50) add_distance(rse4_id, rse2_id, ranking=10) add_distance(rse4_id, rse5_id, ranking=10) add_distance(rse5_id, rse3_id, ranking=50) add_distance(rse5_id, rse4_id, ranking=10) add_distance(rse5_id, rse6_id, ranking=20) # There must be no paths between an isolated node and other nodes; be it with multipath enabled or disabled with pytest.raises(NoDistance): get_hops(source_rse_id=rse0_id, dest_rse_id=rse1_id) with pytest.raises(NoDistance): get_hops(source_rse_id=rse1_id, dest_rse_id=rse0_id) with pytest.raises(NoDistance): get_hops(source_rse_id=rse0_id, dest_rse_id=rse1_id, include_multihop=True, multihop_rses=all_rses) with pytest.raises(NoDistance): get_hops(source_rse_id=rse1_id, dest_rse_id=rse0_id, include_multihop=True, multihop_rses=all_rses) # A single hop path must be found between two directly connected RSE [hop] = get_hops(source_rse_id=rse1_id, dest_rse_id=rse2_id) assert hop['source_rse_id'] == rse1_id assert hop['dest_rse_id'] == rse2_id # No path will be found if there is no direct connection and "include_multihop" is not set with pytest.raises(NoDistance): get_hops(source_rse_id=rse3_id, dest_rse_id=rse2_id) # Multihop_rses argument empty (not set), no path will be computed with pytest.raises(NoDistance): get_hops(source_rse_id=rse3_id, dest_rse_id=rse2_id, include_multihop=True) # The shortest multihop path will be computed [hop1, hop2] = get_hops(source_rse_id=rse3_id, dest_rse_id=rse2_id, include_multihop=True, multihop_rses=all_rses) assert hop1['source_rse_id'] == rse3_id assert hop1['dest_rse_id'] == rse4_id assert hop2['source_rse_id'] == rse4_id assert hop2['dest_rse_id'] == rse2_id # multihop_rses doesn't contain the RSE needed for the shortest path. Return a longer path [hop1, hop2] = get_hops(source_rse_id=rse1_id, dest_rse_id=rse4_id, include_multihop=True, multihop_rses=[rse3_id]) assert hop1['source_rse_id'] == rse1_id assert hop1['dest_rse_id'] == rse3_id assert hop2['source_rse_id'] == rse3_id assert hop2['dest_rse_id'] == rse4_id # A link with cost only in one direction will not be used in the opposite direction with pytest.raises(NoDistance): get_hops(source_rse_id=rse6_id, dest_rse_id=rse5_id, include_multihop=True, multihop_rses=all_rses) [hop1, hop2] = get_hops(source_rse_id=rse4_id, dest_rse_id=rse3_id, include_multihop=True, multihop_rses=all_rses) assert hop1['source_rse_id'] == rse4_id assert hop2['source_rse_id'] == rse5_id assert hop2['dest_rse_id'] == rse3_id # A longer path is preferred over a shorter one with high intermediate cost [hop1, hop2, hop3] = get_hops(source_rse_id=rse3_id, dest_rse_id=rse6_id, include_multihop=True, multihop_rses=all_rses) assert hop1['source_rse_id'] == rse3_id assert hop2['source_rse_id'] == rse4_id assert hop3['source_rse_id'] == rse5_id assert hop3['dest_rse_id'] == rse6_id # A link with no cost is ignored. Both for direct connection and multihop paths [hop1, hop2, hop3] = get_hops(source_rse_id=rse2_id, dest_rse_id=rse6_id, include_multihop=True, multihop_rses=all_rses) assert hop1['source_rse_id'] == rse2_id assert hop2['source_rse_id'] == rse4_id assert hop3['source_rse_id'] == rse5_id assert hop3['dest_rse_id'] == rse6_id [hop1, hop2, hop3, hop4] = get_hops(source_rse_id=rse1_id, dest_rse_id=rse6_id, include_multihop=True, multihop_rses=all_rses) assert hop1['source_rse_id'] == rse1_id assert hop2['source_rse_id'] == rse2_id assert hop3['source_rse_id'] == rse4_id assert hop4['source_rse_id'] == rse5_id assert hop4['dest_rse_id'] == rse6_id
def setup(self): # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() add_rse(self.old_rse_1, availability=1) add_protocol( self.old_rse_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'impl' }) self.old_rse_id_1 = get_rse_id(self.old_rse_1) set_rse_limits(rse=self.old_rse_1, name='limit1', value='10') set_rse_transfer_limits(rse=self.old_rse_1, activity='activity1', max_transfers=10) add_rse_attribute(rse=self.old_rse_1, key='attr1', value='test10') # RSE 2 that already exists self.old_rse_2 = rse_name_generator() add_rse(self.old_rse_2) self.old_rse_id_2 = get_rse_id(self.old_rse_2) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() add_rse(self.old_rse_3) self.old_rse_id_3 = get_rse_id(self.old_rse_3) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) self.data1 = { 'rses': [{ 'rse': self.new_rse, 'rse_type': 'TAPE', 'availability': 5, 'city': 'NewCity', 'protocols': { 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }] }, 'limits': { 'limit1': 0 }, 'transfer_limits': { 'activity1': { 'unknown_rse_id': { 'max_transfers': 1 } } }, 'attributes': { 'attr1': 'test' } }, { 'rse': self.old_rse_1, 'protocols': { 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }] }, 'limits': { 'limit1': 0, 'limit2': 2 }, 'transfer_limits': { 'activity1': { self.old_rse_id_1: { 'max_transfers': 1 } }, 'activity2': { self.old_rse_id_1: { 'max_transfers': 2 } } }, 'attributes': { 'attr1': 'test1', 'attr2': 'test2' } }], 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse_id': self.old_rse_id_1, 'dest_rse_id': self.old_rse_id_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse_id': self.old_rse_id_1, 'dest_rse_id': self.old_rse_id_3, 'ranking': 4 } } } } self.data2 = {'rses': [{'rse': self.new_rse}]} self.data3 = {'distances': {}}
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope): tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id] all_rses = source_rses + [dst_rse_id] add_distance(disk1_rse_id, dst_rse_id, ranking=15) add_distance(disk2_rse_id, dst_rse_id, ranking=10) add_distance(tape1_rse_id, dst_rse_id, ranking=15) add_distance(tape2_rse_id, dst_rse_id, ranking=10) # add same file to all source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in source_rses: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 2 assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs. Disk still preferred, because it must fail twice before tape is tried __fake_source_ranking(request, disk1_rse_id, -1) __fake_source_ranking(request, disk2_rse_id, -1) [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 2 assert transfer[0].legacy_sources[0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs again. Tape RSEs must now be preferred. # Multiple tape sources are not allowed. Only one tape RSE source must be returned. __fake_source_ranking(request, disk1_rse_id, -2) __fake_source_ranking(request, disk2_rse_id, -2) [[_, transfers]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfers) == 1 transfer = transfers[0] assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] in (tape1_rse_name, tape2_rse_name) # On equal source ranking, but different distance; the smaller distance is preferred [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] == tape2_rse_name # On different source ranking, the bigger ranking is preferred __fake_source_ranking(request, tape2_rse_id, -1) [[_, [transfer]]] = next_transfers_to_submit(rses=all_rses).items() assert len(transfer[0].legacy_sources) == 1 assert transfer[0].legacy_sources[0][0] == tape1_rse_name
def test_overwrite_on_tape(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that overwrite is not set for transfers towards TAPE RSEs Submit a real transfer to FTS and rely on the gfal "mock" plugin to trigger a failure. The failure is triggered when gfal_stat is called on the destination URL and it returns a result. To achieve this via the mock plugin, it's enough to have a mock:// protocol/scheme and add size_pre=<something> url parameter. https://gitlab.cern.ch/dmc/gfal2/-/blob/master/src/plugins/mock/README_PLUGIN_MOCK """ # +------+ +------+ +------+ # | | | | | | # | RSE1 +--->| RSE2 |--->| RSE3 | # | | | | |(tape)| # +------+ +------+ +------+ rse1, rse1_id = rse_factory.make_rse( scheme='mock', protocol_impl='rucio.rse.protocols.posix.Default') rse2, rse2_id = rse_factory.make_rse( scheme='mock', protocol_impl='rucio.rse.protocols.posix.Default') rse3, rse3_id = rse_factory.make_rse( scheme='mock', protocol_impl='rucio.rse.protocols.posix.Default', rse_type=RSEType.TAPE) all_rses = [rse1_id, rse2_id, rse3_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse2_id, rse3_id, ranking=10) rse_core.add_rse_attribute(rse2_id, 'available_for_multihop', True) for rse_id in all_rses: rse_core.add_rse_attribute(rse_id, 'fts', 'https://fts:8446') # multihop transfer: did1 = did_factory.upload_test_file(rse1) # direct transfer: did2 = did_factory.upload_test_file(rse2) rule_core.add_rule(dids=[did1, did2], account=root_account, copies=1, rse_expression=rse3, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Wrap dest url generation to add size_pre=2 query parameter non_mocked_dest_url = transfer_core.DirectTransferDefinition._dest_url def mocked_dest_url(cls, *args): return set_query_parameters(non_mocked_dest_url(*args), {'size_pre': 2}) with patch('rucio.core.transfer.DirectTransferDefinition._dest_url', new=mocked_dest_url): submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=10, partition_wait_time=None, transfertype='single', filter_transfertool=None) request = __wait_for_request_state(dst_rse_id=rse3_id, state=RequestState.FAILED, **did1) assert request['state'] == RequestState.FAILED request = __wait_for_request_state(dst_rse_id=rse3_id, state=RequestState.FAILED, **did2) assert request['state'] == RequestState.FAILED assert 'Destination file exists and overwrite is not enabled' in request[ 'err_msg']
def test_singlehop_vs_multihop_priority(rse_factory, root_account, mock_scope, core_config_mock, caches_mock): """ On small distance difference, singlehop is prioritized over multihop due to HOP_PENALTY. On big difference, multihop is prioritized """ # +------+ +------+ # | | 10 | | # | RSE0 +--->| RSE1 | # | | | +-+ 10 # +------+ +------+ | +------+ +------+ # +->| | 200 | | # +------+ | RSE3 |<------| RSE4 | # | | 30 +--->| | | | # | RSE2 +-----------+ +------+ +------+ # | | # +------+ _, rse0_id = rse_factory.make_posix_rse() _, rse1_id = rse_factory.make_posix_rse() _, rse2_id = rse_factory.make_posix_rse() rse3_name, rse3_id = rse_factory.make_posix_rse() _, rse4_id = rse_factory.make_posix_rse() add_distance(rse0_id, rse1_id, ranking=10) add_distance(rse1_id, rse3_id, ranking=10) add_distance(rse2_id, rse3_id, ranking=30) add_distance(rse4_id, rse3_id, ranking=200) rse_core.add_rse_attribute(rse1_id, 'available_for_multihop', True) # add same file to two source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse2_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The singlehop must be prioritized transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert transfer['file_metadata']['src_rse_id'] == rse2_id assert transfer['file_metadata']['dest_rse_id'] == rse3_id # add same file to two source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in [rse0_id, rse4_id]: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # The multihop must be prioritized transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=rse_factory.created_rses) assert len([ t for t in transfers.values() if t['file_metadata']['name'] == did['name'] ]) == 2
def test_multihop_sources_created(rse_factory, did_factory, root_account, core_config_mock, caches_mock): """ Ensure that multihop transfers are handled and intermediate request correctly created """ src_rse_name, src_rse_id = rse_factory.make_posix_rse() _, jump_rse1_id = rse_factory.make_posix_rse() _, jump_rse2_id = rse_factory.make_posix_rse() _, jump_rse3_id = rse_factory.make_posix_rse() dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() jump_rses = [jump_rse1_id, jump_rse2_id, jump_rse3_id] all_rses = jump_rses + [src_rse_id, dst_rse_id] for rse_id in jump_rses: rse_core.add_rse_attribute(rse_id, 'available_for_multihop', True) rse_tombstone_delay = 3600 rse_multihop_tombstone_delay = 12 * 3600 default_multihop_tombstone_delay = 24 * 3600 # if both attributes are set, the multihop one will take precedence rse_core.add_rse_attribute(jump_rse1_id, 'tombstone_delay', rse_tombstone_delay) rse_core.add_rse_attribute(jump_rse1_id, 'multihop_tombstone_delay', rse_multihop_tombstone_delay) # if multihop delay not set, it's the default multihop takes precedence. Not normal tombstone delay. rse_core.add_rse_attribute(jump_rse2_id, 'tombstone_delay', rse_tombstone_delay) core_config.set(section='transfers', option='multihop_tombstone_delay', value=default_multihop_tombstone_delay) # if multihop delay is set to 0, the replica will have no tombstone rse_core.add_rse_attribute(jump_rse3_id, 'multihop_tombstone_delay', 0) distance_core.add_distance(src_rse_id, jump_rse1_id, ranking=10) distance_core.add_distance(jump_rse1_id, jump_rse2_id, ranking=10) distance_core.add_distance(jump_rse2_id, jump_rse3_id, ranking=10) distance_core.add_distance(jump_rse3_id, dst_rse_id, ranking=10) did = did_factory.upload_test_file(src_rse_name) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{'id': rse_id} for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # Ensure that each intermediate request was correctly created for rse_id in jump_rses: assert request_core.get_request_by_did(rse_id=rse_id, **did) @read_session def __ensure_source_exists(rse_id, scope, name, session=None): return session.query(Source). \ filter(Source.rse_id == rse_id). \ filter(Source.scope == scope). \ filter(Source.name == name). \ one() # Ensure that sources where created for transfers for rse_id in jump_rses + [src_rse_id]: __ensure_source_exists(rse_id, **did) # Ensure the tombstone is correctly set on intermediate replicas expected_tombstone = datetime.utcnow() + timedelta(seconds=rse_multihop_tombstone_delay) replica = replica_core.get_replica(jump_rse1_id, **did) assert expected_tombstone - timedelta(minutes=5) < replica['tombstone'] < expected_tombstone + timedelta(minutes=5) expected_tombstone = datetime.utcnow() + timedelta(seconds=default_multihop_tombstone_delay) replica = replica_core.get_replica(jump_rse2_id, **did) assert expected_tombstone - timedelta(minutes=5) < replica['tombstone'] < expected_tombstone + timedelta(minutes=5) replica = replica_core.get_replica(jump_rse3_id, **did) assert replica['tombstone'] is None
def test_hop_penalty(rse_factory, did_factory, root_account, file_config_mock, core_config_mock, caches_mock): """ Test that both global hop_penalty and the per-rse one are correctly taken into consideration """ # +------+ +------+ +------+ # | | | 5 | | | # | RSE1 +--->| RSE2 +--->| RSE3 | # | | | | | | # +------+ +------+ +--^---+ # | # +------+ +------+ | # | | | 20 | | # | RSE4 +--->| RSE5 +-------+ # | | | | # +------+ +------+ rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() rse3, rse3_id = rse_factory.make_posix_rse() rse4, rse4_id = rse_factory.make_posix_rse() rse5, rse5_id = rse_factory.make_posix_rse() all_rses = [rse1_id, rse2_id, rse3_id, rse4_id, rse5_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse2_id, rse3_id, ranking=10) distance_core.add_distance(rse4_id, rse5_id, ranking=10) distance_core.add_distance(rse5_id, rse3_id, ranking=10) rse_core.add_rse_attribute(rse2_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'available_for_multihop', True) rse_core.add_rse_attribute(rse5_id, 'hop_penalty', 20) did = did_factory.random_did() replica_core.add_replica(rse_id=rse1_id, account=root_account, bytes_=1, **did) replica_core.add_replica(rse_id=rse4_id, account=root_account, bytes_=1, **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=rse3, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', ignore_availability=True) # Ensure the path was created through the correct middle hop request_core.get_request_by_did(rse_id=rse2_id, **did) with pytest.raises(RequestNotFound): request_core.get_request_by_did(rse_id=rse5_id, **did)
def test_globus(rse_factory, did_factory, root_account): """ Test bulk submissions with globus transfertool. Rely on mocks, because we don't contact a real globus server in tests """ # +------+ +------+ # | | | | # | RSE1 +--->| RSE2 | # | | | | # +------+ +------+ # # +------+ +------+ # | | | | # | RSE3 +--->| RSE4 | # | | | | # +------+ +------+ rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() rse3, rse3_id = rse_factory.make_posix_rse() rse4, rse4_id = rse_factory.make_posix_rse() all_rses = [rse1_id, rse2_id, rse3_id, rse4_id] distance_core.add_distance(rse1_id, rse2_id, ranking=10) distance_core.add_distance(rse3_id, rse4_id, ranking=10) for rse_id in all_rses: rse_core.add_rse_attribute(rse_id, 'globus_endpoint_id', rse_id) # Single submission did1 = did_factory.upload_test_file(rse1) rule_core.add_rule(dids=[did1], account=root_account, copies=1, rse_expression=rse2, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) did2 = did_factory.upload_test_file(rse3) rule_core.add_rule(dids=[did2], account=root_account, copies=1, rse_expression=rse4, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) with patch( 'rucio.transfertool.globus.bulk_submit_xfer') as mock_bulk_submit: mock_bulk_submit.return_value = 0 submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=10, partition_wait_time=None, transfertool='globus', transfertype='single', filter_transfertool=None) # Called separately for each job assert len(mock_bulk_submit.call_args_list) == 2 (submitjob, ), _kwargs = mock_bulk_submit.call_args_list[0] assert len(submitjob) == 1 # Bulk submission did1 = did_factory.upload_test_file(rse1) rule_core.add_rule(dids=[did1], account=root_account, copies=1, rse_expression=rse2, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) did2 = did_factory.upload_test_file(rse3) rule_core.add_rule(dids=[did2], account=root_account, copies=1, rse_expression=rse4, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) with patch( 'rucio.transfertool.globus.bulk_submit_xfer') as mock_bulk_submit: mock_bulk_submit.return_value = 0 submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], group_bulk=10, partition_wait_time=None, transfertool='globus', transfertype='bulk', filter_transfertool=None) mock_bulk_submit.assert_called_once() (submitjob, ), _kwargs = mock_bulk_submit.call_args_list[0] # both jobs were grouped together and submitted in one call assert len(submitjob) == 2 job_did1 = next( iter( filter(lambda job: did1['name'] in job['sources'][0], submitjob))) assert len(job_did1['sources']) == 1 assert len(job_did1['destinations']) == 1 assert job_did1['metadata']['src_rse'] == rse1 assert job_did1['metadata']['dst_rse'] == rse2 assert job_did1['metadata']['name'] == did1['name'] assert job_did1['metadata']['source_globus_endpoint_id'] == rse1_id assert job_did1['metadata']['dest_globus_endpoint_id'] == rse2_id job_did2 = next( iter( filter(lambda job: did2['name'] in job['sources'][0], submitjob))) assert len(job_did2['sources']) == 1 assert len(job_did2['destinations']) == 1 assert job_did2['metadata']['src_rse'] == rse3 assert job_did2['metadata']['dst_rse'] == rse4 assert job_did2['metadata']['name'] == did2['name'] request = request_core.get_request_by_did(rse_id=rse2_id, **did1) assert request['state'] == RequestState.SUBMITTED request = request_core.get_request_by_did(rse_id=rse4_id, **did2) assert request['state'] == RequestState.SUBMITTED
def test_source_avoid_deletion(vo, caches_mock, core_config_mock, rse_factory, did_factory, root_account, file_factory): """ Test that sources on a file block it from deletion """ _, reaper_region = caches_mock src_rse1, src_rse1_id = rse_factory.make_mock_rse() src_rse2, src_rse2_id = rse_factory.make_mock_rse() dst_rse, dst_rse_id = rse_factory.make_mock_rse() all_rses = [src_rse1_id, src_rse2_id, dst_rse_id] any_source = f'{src_rse1}|{src_rse2}' for rse_id in [src_rse1_id, src_rse2_id]: rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=1) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=1, free=0) distance_core.add_distance(src_rse1_id, dst_rse_id, ranking=20) distance_core.add_distance(src_rse2_id, dst_rse_id, ranking=10) # Upload a test file to both rses without registering did = did_factory.random_did() # Register replica on one source RSE replica_core.add_replica(rse_id=src_rse1_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Reaper will not delete a file which only has one replica if there is any pending transfer for it reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1 # Register replica on second source rse replica_core.add_replica(rse_id=src_rse2_id, account=root_account, bytes_=1, tombstone=datetime(year=1970, month=1, day=1), **did) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 # Submit the transfer. This will create the sources. submitter(once=True, rses=[{ 'id': rse_id } for rse_id in all_rses], partition_wait_time=None, transfertool='mock', transfertype='single', filter_transfertool=None) # None of the replicas will be removed. They are protected by an entry in the sources table reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 2 @transactional_session def __delete_sources(rse_id, scope, name, session=None): session.execute( delete(Source).where(Source.rse_id == rse_id, Source.scope == scope, Source.name == name)) # Deletion succeeds for one replica (second still protected by existing request) __delete_sources(src_rse1_id, **did) __delete_sources(src_rse2_id, **did) reaper_region.invalidate() reaper(once=True, rses=[], include_rses=any_source, exclude_rses=None) replica = next( iter(replica_core.list_replicas(dids=[did], rse_expression=any_source))) assert len(replica['pfns']) == 1
def setup(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo_header = {'X-Rucio-VO': 'tst'} self.vo = {'vo': 'tst'} else: self.vo_header = {} self.vo = {} # New RSE self.new_rse = rse_name_generator() # RSE 1 that already exists self.old_rse_1 = rse_name_generator() self.old_rse_id_1 = add_rse(self.old_rse_1, availability=1, region_code='DE', country_name='DE', deterministic=True, volatile=True, staging_area=True, time_zone='Europe', latitude='1', longitude='2', **self.vo) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'impl': 'TODO' }) add_protocol( self.old_rse_id_1, { 'scheme': 'scheme3', 'hostname': 'hostname3', 'port': 1000, 'impl': 'TODO' }) set_rse_limits(rse_id=self.old_rse_id_1, name='MaxBeingDeletedFiles', value='10') set_rse_limits(rse_id=self.old_rse_id_1, name='MinFreeSpace', value='10') add_rse_attribute(rse_id=self.old_rse_id_1, key='attr1', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='lfn2pfn_algorithm', value='test10') add_rse_attribute(rse_id=self.old_rse_id_1, key='verify_checksum', value=True) # RSE 2 that already exists self.old_rse_2 = rse_name_generator() self.old_rse_id_2 = add_rse(self.old_rse_2, **self.vo) # RSE 3 that already exists self.old_rse_3 = rse_name_generator() self.old_rse_id_3 = add_rse(self.old_rse_3, **self.vo) # RSE 4 that already exists self.old_rse_4 = rse_name_generator() self.old_rse_id_4 = add_rse(self.old_rse_4, **self.vo) # Distance that already exists add_distance(self.old_rse_id_1, self.old_rse_id_2) self.data1 = { 'rses': { self.new_rse: { 'rse_type': RSEType.TAPE, 'availability': 3, 'city': 'NewCity', 'region_code': 'CH', 'country_name': 'switzerland', 'staging_area': False, 'time_zone': 'Europe', 'latitude': 1, 'longitude': 2, 'deterministic': True, 'volatile': False, 'protocols': [{ 'scheme': 'scheme', 'hostname': 'hostname', 'port': 1000, 'impl': 'impl' }], 'attributes': { 'attr1': 'test' }, 'MinFreeSpace': 20000, 'lfn2pfn_algorithm': 'hash2', 'verify_checksum': False, 'availability_delete': True, 'availability_read': False, 'availability_write': True }, self.old_rse_1: { 'rse_type': RSEType.TAPE, 'deterministic': False, 'volatile': False, 'region_code': 'US', 'country_name': 'US', 'staging_area': False, 'time_zone': 'Asia', 'longitude': 5, 'city': 'City', 'availability': 2, 'latitude': 10, 'protocols': [{ 'scheme': 'scheme1', 'hostname': 'hostname1', 'port': 1000, 'prefix': 'prefix', 'impl': 'impl1' }, { 'scheme': 'scheme2', 'hostname': 'hostname2', 'port': 1001, 'impl': 'impl' }], 'attributes': { 'attr1': 'test1', 'attr2': 'test2' }, 'MinFreeSpace': 10000, 'MaxBeingDeletedFiles': 1000, 'verify_checksum': False, 'lfn2pfn_algorithm': 'hash3', 'availability_delete': False, 'availability_read': False, 'availability_write': True }, self.old_rse_2: {}, self.old_rse_3: {} }, 'distances': { self.old_rse_1: { self.old_rse_2: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_2, 'ranking': 10 }, self.old_rse_3: { 'src_rse': self.old_rse_1, 'dest_rse': self.old_rse_3, 'ranking': 4 } } } } self.data2 = {'rses': {self.new_rse: {'rse': self.new_rse}}} self.data3 = {'distances': {}}
def test_disk_vs_tape_priority(rse_factory, root_account, mock_scope): tape1_rse_name, tape1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) tape2_rse_name, tape2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.TAPE) disk1_rse_name, disk1_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) disk2_rse_name, disk2_rse_id = rse_factory.make_posix_rse( rse_type=RSEType.DISK) dst_rse_name, dst_rse_id = rse_factory.make_posix_rse() source_rses = [tape1_rse_id, tape2_rse_id, disk1_rse_id, disk2_rse_id] all_rses = source_rses + [dst_rse_id] add_distance(disk1_rse_id, dst_rse_id, ranking=15) add_distance(disk2_rse_id, dst_rse_id, ranking=10) add_distance(tape1_rse_id, dst_rse_id, ranking=15) add_distance(tape2_rse_id, dst_rse_id, ranking=10) # add same file to all source RSEs file = { 'scope': mock_scope, 'name': 'lfn.' + generate_uuid(), 'type': 'FILE', 'bytes': 1, 'adler32': 'beefdead' } did = {'scope': file['scope'], 'name': file['name']} for rse_id in source_rses: add_replicas(rse_id=rse_id, files=[file], account=root_account) rule_core.add_rule(dids=[did], account=root_account, copies=1, rse_expression=dst_rse_name, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) request = request_core.get_request_by_did(rse_id=dst_rse_id, **did) @transactional_session def __fake_source_ranking(source_rse_id, new_ranking, session=None): rowcount = session.query(models.Source).filter( models.Source.rse_id == source_rse_id).update( {'ranking': new_ranking}) if not rowcount: models.Source(request_id=request['id'], scope=request['scope'], name=request['name'], rse_id=source_rse_id, dest_rse_id=request['dest_rse_id'], ranking=new_ranking, bytes=request['bytes'], url=None, is_using=False). \ save(session=session, flush=False) # On equal priority and distance, disk should be preferred over tape. Both disk sources will be returned transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 2 assert transfer[0]['sources'][0][0] in (disk1_rse_name, disk2_rse_name) # Change the rating of the disk RSEs. Tape RSEs must now be preferred. # Multiple tape sources are not allowed. Only one tape RSE source must be returned. __fake_source_ranking(disk1_rse_id, -1) __fake_source_ranking(disk2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] in (tape1_rse_name, tape2_rse_name) # On equal source ranking, but different distance; the smaller distance is preferred transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape2_rse_name # On different source ranking, the bigger ranking is preferred __fake_source_ranking(tape2_rse_id, -1) transfers, _reqs_no_source, _reqs_scheme_mismatch, _reqs_only_tape_source = get_transfer_requests_and_source_replicas( rses=all_rses) assert len(transfers) == 1 transfer = next(iter(transfers.values())) assert len(transfer[0]['sources']) == 1 assert transfer[0]['sources'][0][0] == tape1_rse_name