def test_list_replicas_all_states(self): """ REPLICA (CORE): list file replicas with all_states""" tmp_scope = 'mock' nbfiles = 13 files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] rses = ['MOCK', 'MOCK3'] for rse in rses: add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for file in files: update_replica_state('MOCK', tmp_scope, file['name'], ReplicaState.COPYING) replica_cpt = 0 for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm'], all_states=True): assert_in('states', replica) assert_equal(replica['states']['MOCK'], str(ReplicaState.COPYING)) assert_equal(replica['states']['MOCK3'], str(ReplicaState.AVAILABLE)) replica_cpt += 1 assert_equal(nbfiles, replica_cpt)
def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': get_vo()} else: self.vo = {} self.replica_client = ReplicaClient() # Using two test RSEs self.rse4suspicious = 'MOCK_SUSPICIOUS' self.rse4suspicious_id = get_rse_id(self.rse4suspicious, **self.vo) self.rse4recovery = 'MOCK_RECOVERY' self.rse4recovery_id = get_rse_id(self.rse4recovery, **self.vo) self.scope = 'mock' self.internal_scope = InternalScope(self.scope, **self.vo) # For testing, we create 3 files and upload them to Rucio to two test RSEs. self.tmp_file1 = file_generator() self.tmp_file2 = file_generator() self.tmp_file3 = file_generator() self.tmp_file4 = file_generator() self.tmp_file5 = file_generator() self.listdids = [{'scope': self.internal_scope, 'name': path.basename(f), 'type': DIDType.FILE} for f in [self.tmp_file1, self.tmp_file2, self.tmp_file3, self.tmp_file4, self.tmp_file5]] for rse in [self.rse4suspicious, self.rse4recovery]: cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4} {5} {6}'.format(rse, self.scope, self.tmp_file1, self.tmp_file2, self.tmp_file3, self.tmp_file4, self.tmp_file5) exitcode, out, err = execute(cmd) # checking if Rucio upload went OK assert exitcode == 0 # Set fictional datatypes set_metadata(self.internal_scope, path.basename(self.tmp_file4), 'datatype', 'testtypedeclarebad') set_metadata(self.internal_scope, path.basename(self.tmp_file5), 'datatype', 'testtypenopolicy') # Allow for the RSEs to be affected by the suspicious file recovery daemon add_rse_attribute(self.rse4suspicious_id, "enable_suspicious_file_recovery", True) add_rse_attribute(self.rse4recovery_id, "enable_suspicious_file_recovery", True) # removing physical files from /tmp location - keeping only their DB info remove(self.tmp_file1) remove(self.tmp_file2) remove(self.tmp_file3) remove(self.tmp_file4) remove(self.tmp_file5) # Gather replica info replicalist = list_replicas(dids=self.listdids) # Changing the replica statuses as follows: # ---------------------------------------------------------------------------------------------------------------------------------- # Name State(s) declared on MOCK_RECOVERY State(s) declared on MOCK_SUSPICIOUS Metadata "datatype" # ---------------------------------------------------------------------------------------------------------------------------------- # tmp_file1 available suspicious (available) # tmp_file2 available suspicious + bad (unavailable) # tmp_file3 unavailable suspicious (available) RAW # tmp_file4 unavailable suspicious (available) testtypedeclarebad # tmp_file5 unavailable suspicious (available) testtypenopolicy # ---------------------------------------------------------------------------------------------------------------------------------- for replica in replicalist: suspicious_pfns = replica['rses'][self.rse4suspicious_id] for i in range(3): print("Declaring suspicious file replica: " + suspicious_pfns[0]) self.replica_client.declare_suspicious_file_replicas([suspicious_pfns[0], ], 'This is a good reason.') sleep(1) if replica['name'] == path.basename(self.tmp_file2): print("Declaring bad file replica: " + suspicious_pfns[0]) self.replica_client.declare_bad_file_replicas([suspicious_pfns[0], ], 'This is a good reason') if replica['name'] == path.basename(self.tmp_file3): print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0]) update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file3), ReplicaState.UNAVAILABLE) if replica['name'] == path.basename(self.tmp_file4): print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0]) update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file4), ReplicaState.UNAVAILABLE) if replica['name'] == path.basename(self.tmp_file5): print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0]) update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file5), ReplicaState.UNAVAILABLE) # Gather replica info after setting initial replica statuses replicalist = list_replicas(dids=self.listdids) # Checking if the status changes were effective for replica in replicalist: if replica['name'] == path.basename(self.tmp_file1): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert replica['states'][self.rse4recovery_id] == 'AVAILABLE' if replica['name'] == path.basename(self.tmp_file2): assert (self.rse4suspicious_id in replica['states']) is False assert replica['states'][self.rse4recovery_id] == 'AVAILABLE' if replica['name'] == path.basename(self.tmp_file3): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert (self.rse4recovery_id in replica['states']) is False if replica['name'] == path.basename(self.tmp_file4): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert (self.rse4recovery_id in replica['states']) is False if replica['name'] == path.basename(self.tmp_file5): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert (self.rse4recovery_id in replica['states']) is False # Checking if only self.tmp_file2 is declared as 'BAD' self.from_date = datetime.now() - timedelta(days=1) bad_replicas_list = list_bad_replicas_status(rse_id=self.rse4suspicious_id, younger_than=self.from_date, **self.vo) bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list] assert (path.basename(self.tmp_file1), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file2), self.rse4suspicious_id, BadFilesStatus.BAD) in bad_checklist assert (path.basename(self.tmp_file3), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file4), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file5), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist bad_replicas_list = list_bad_replicas_status(rse_id=self.rse4recovery_id, younger_than=self.from_date, **self.vo) bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list] assert (path.basename(self.tmp_file1), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file2), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file3), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file4), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file5), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
def __create_missing_replicas_and_requests( transfer_path: "List[DirectTransferDefinition]", default_tombstone_delay: int, logger: "Callable", session: "Optional[Session]" = None) -> "Tuple[bool, bool]": """ Create replicas and requests in the database for the intermediate hops """ initial_request_id = transfer_path[-1].rws.request_id creation_successful = True must_skip_submission = False # Iterate the path in reverse order. The last hop is the initial request, so # next_hop.rws.request_id will always be initialized when handling the current hop. for i in reversed(range(len(transfer_path))): hop = transfer_path[i] rws = hop.rws if rws.request_id: continue tombstone_delay = rws.dest_rse.attributes.get( 'multihop_tombstone_delay', default_tombstone_delay) try: tombstone = tombstone_from_delay(tombstone_delay) except ValueError: logger(logging.ERROR, "%s: Cannot parse multihop tombstone delay %s", initial_request_id, tombstone_delay) creation_successful = False break files = [{ 'scope': rws.scope, 'name': rws.name, 'bytes': rws.byte_count, 'adler32': rws.adler32, 'md5': rws.md5, 'tombstone': tombstone, 'state': 'C' }] try: add_replicas(rse_id=rws.dest_rse.id, files=files, account=rws.account, ignore_availability=False, dataset_meta=None, session=session) # Set replica state to Copying in case replica already existed in another state. # Can happen when a multihop transfer failed previously, and we are re-scheduling it now. update_replica_state(rse_id=rws.dest_rse.id, scope=rws.scope, name=rws.name, state=ReplicaState.COPYING, session=session) except Exception as error: logger(logging.ERROR, '%s: Problem adding replicas on %s : %s', initial_request_id, rws.dest_rse, str(error)) rws.attributes['is_intermediate_hop'] = True # next_hop_request_id and initial_request_id are not used anymore in rucio >=1.28, but are needed # for running at the same time 1.27 and 1.28 on the same database. # TODO: remove following two rows rws.attributes['next_hop_request_id'] = transfer_path[i + 1].rws.request_id rws.attributes['initial_request_id'] = initial_request_id rws.attributes['source_replica_expression'] = hop.src.rse.name req_to_queue = { 'dest_rse_id': rws.dest_rse.id, 'state': RequestState.QUEUED, 'scope': rws.scope, 'name': rws.name, 'rule_id': '00000000000000000000000000000000', # Dummy Rule ID used for multihop. TODO: Replace with actual rule_id once we can flag intermediate requests 'attributes': rws.attributes, 'request_type': rws.request_type, 'retry_count': rws.retry_count, 'account': rws.account, 'requested_at': datetime.datetime.now() } if rws.transfertool: req_to_queue['transfertool'] = rws.transfertool new_req = queue_requests(requests=[req_to_queue], session=session) # If a request already exists, new_req will be an empty list. if new_req: db_req = new_req[0] logger( logging.DEBUG, '%s: New request created for the transfer between %s and %s : %s', initial_request_id, transfer_path[0].src, transfer_path[-1].dst, db_req['id']) else: db_req = request_core.get_request_by_did(rws.scope, rws.name, rws.dest_rse.id, session=session) # A transfer already exists for part of the path. Just construct the remaining # path, but don't submit the transfer. We must wait for the existing transfer to be # completed before continuing. must_skip_submission = True logger(logging.DEBUG, '%s: Reusing intermediate hop between %s and %s : %s', initial_request_id, transfer_path[0].src, transfer_path[-1].dst, db_req['id']) models.TransferHop( request_id=db_req['id'], next_hop_request_id=transfer_path[i + 1].rws.request_id, initial_request_id=initial_request_id, ).save(session=session, flush=False) rws.request_id = db_req['id'] rws.requested_at = db_req['requested_at'] return creation_successful, must_skip_submission
'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for i in xrange(nbfiles)] rses = ['MOCK', 'MOCK3'] for rse in rses: add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for file in files: update_replica_state('MOCK', tmp_scope, file['name'], ReplicaState.COPYING) replica_cpt = 0 for replica in list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE } for f in files], schemes=['srm'], all_states=True): assert_in('states', replica) assert_equal(replica['states']['MOCK'], str(ReplicaState.COPYING)) assert_equal(replica['states']['MOCK3'], str(ReplicaState.AVAILABLE)) replica_cpt += 1
assert_equal(None, get_replica_atime({'scope': files1[i]['scope'], 'name': files1[i]['name'], 'rse': 'MOCK'})) for i in range(0, nbfiles - 1): assert_equal(None, get_replica_atime({'scope': files2[i]['scope'], 'name': files2[i]['name'], 'rse': 'MOCK'})) def test_list_replicas_all_states(self): """ REPLICA (CORE): list file replicas with all_states""" tmp_scope = 'mock' nbfiles = 13 files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] rses = ['MOCK', 'MOCK3'] for rse in rses: add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for file in files: update_replica_state('MOCK', tmp_scope, file['name'], ReplicaState.COPYING) replica_cpt = 0 for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm'], all_states=True): assert_in('states', replica) assert_equal(replica['states']['MOCK'], ReplicaState.COPYING) assert_equal(replica['states']['MOCK3'], ReplicaState.AVAILABLE) replica_cpt += 1 assert_equal(nbfiles, replica_cpt) class TestReplicaClients: def setup(self): self.replica_client = ReplicaClient()
def setUp(self): self.replica_client = ReplicaClient() # Using two test RSEs self.rse4suspicious = 'MOCK_SUSPICIOUS' self.rse4recovery = 'MOCK_RECOVERY' self.scope = 'mock' # For testing, we create 3 files and upload them to Rucio to two test RSEs. self.tmp_file1 = file_generator() self.tmp_file2 = file_generator() self.tmp_file3 = file_generator() self.listdids = [{ 'scope': self.scope, 'name': path.basename(self.tmp_file1), 'type': DIDType.FILE }, { 'scope': self.scope, 'name': path.basename(self.tmp_file2), 'type': DIDType.FILE }, { 'scope': self.scope, 'name': path.basename(self.tmp_file3), 'type': DIDType.FILE }] for rse in [self.rse4suspicious, self.rse4recovery]: cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format( rse, self.scope, self.tmp_file1, self.tmp_file2, self.tmp_file3) exitcode, out, err = execute(cmd) # checking if Rucio upload went OK assert_true(exitcode == 0) # removing physical files from /tmp location - keeping only their DB info remove(self.tmp_file1) remove(self.tmp_file2) remove(self.tmp_file3) # Gather replica info replicalist = list_replicas(dids=self.listdids) # Changing the replica statuses as follows: # -------------------------------------------------------------------------------------------- # Name State(s) declared on MOCK_RECOVERY State(s) declared on MOCK_SUSPICIOUS # -------------------------------------------------------------------------------------------- # tmp_file1 available suspicious (available) # tmp_file2 available suspicious + bad (unavailable) # tmp_file3 unavailable suspicious (available) # -------------------------------------------------------------------------------------------- for replica in replicalist: for i in range(3): print("Declaring suspicious file replica: " + replica['rses'][self.rse4suspicious][0]) self.replica_client.declare_suspicious_file_replicas([ replica['rses'][self.rse4suspicious][0], ], 'This is a good reason.') sleep(1) if replica['name'] == path.basename(self.tmp_file2): print("Declaring bad file replica: " + replica['rses'][self.rse4suspicious][0]) self.replica_client.declare_bad_file_replicas([ replica['rses'][self.rse4suspicious][0], ], 'This is a good reason') if replica['name'] == path.basename(self.tmp_file3): print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery][0]) update_replica_state(self.rse4recovery, self.scope, path.basename(self.tmp_file3), ReplicaState.UNAVAILABLE) # Gather replica info after setting initial replica statuses replicalist = list_replicas(dids=self.listdids) # Checking if the status changes were effective for replica in replicalist: if replica['name'] == path.basename(self.tmp_file1): assert_true( replica['states'][self.rse4suspicious] == 'AVAILABLE') assert_true( replica['states'][self.rse4recovery] == 'AVAILABLE') if replica['name'] == path.basename(self.tmp_file2): assert_true( (self.rse4suspicious in replica['states']) is False) assert_true( replica['states'][self.rse4recovery] == 'AVAILABLE') if replica['name'] == path.basename(self.tmp_file3): assert_true( replica['states'][self.rse4suspicious] == 'AVAILABLE') assert_true((self.rse4recovery in replica['states']) is False) # Checking if only self.tmp_file2 is declared as 'BAD' self.from_date = datetime.now() - timedelta(days=1) bad_replicas_list = list_bad_replicas_status( rse=self.rse4suspicious, younger_than=self.from_date) bad_checklist = [(badf['name'], badf['rse'], badf['state']) for badf in bad_replicas_list] assert_true((path.basename(self.tmp_file2), self.rse4suspicious, BadFilesStatus.BAD) in bad_checklist) assert_true((path.basename(self.tmp_file1), self.rse4suspicious, BadFilesStatus.BAD) not in bad_checklist) assert_true((path.basename(self.tmp_file3), self.rse4suspicious, BadFilesStatus.BAD) not in bad_checklist) bad_replicas_list = list_bad_replicas_status( rse=self.rse4recovery, younger_than=self.from_date) bad_checklist = [(badf['name'], badf['rse'], badf['state']) for badf in bad_replicas_list] assert_true((path.basename(self.tmp_file1), self.rse4recovery, BadFilesStatus.BAD) not in bad_checklist) assert_true((path.basename(self.tmp_file2), self.rse4recovery, BadFilesStatus.BAD) not in bad_checklist) assert_true((path.basename(self.tmp_file3), self.rse4recovery, BadFilesStatus.BAD) not in bad_checklist)