def test_abacus_collection_replica(self): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ self.files = [{ 'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1 } for i in range(0, 2)] self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1) self.upload_client.upload(self.files) self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{ 'name': file['did_name'], 'scope': file['did_scope'] } for file in self.files]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) [os.remove(file['path']) for file in self.files] # Check dataset replica after rule creation - initial data dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert_equal(dataset_replica['bytes'], 0) assert_equal(dataset_replica['length'], 0) assert_equal(dataset_replica['available_bytes'], 0) assert_equal(dataset_replica['available_length'], 0) assert_equal(str(dataset_replica['state']), 'UNAVAILABLE') # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['length'], len(self.files)) assert_equal(dataset_replica['available_bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['available_length'], len(self.files)) assert_equal(str(dataset_replica['state']), 'AVAILABLE') # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=self.rse_id, files=[{ 'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'], **self.vo) }]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert_equal(dataset_replica['length'], len(self.files)) assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['available_length'], len(self.files) - 1) assert_equal(dataset_replica['available_bytes'], (len(self.files) - 1) * self.file_sizes) assert_equal(str(dataset_replica['state']), 'UNAVAILABLE') # Delete all files -> collection replica should be deleted cleaner.run(once=True) reaper.run(once=True, rses=[self.rse], greedy=True) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ] assert_equal(len(dataset_replica), 0)
def __update_datasets(dataset_queue, logger=logging.log): len_ds = dataset_queue.qsize() datasets = {} dslocks = {} now = time() for _ in range(0, len_ds): dataset = dataset_queue.get() did = '%s:%s' % (dataset['scope'].internal, dataset['name']) rse = dataset['rse_id'] if did not in datasets: datasets[did] = dataset['accessed_at'] else: datasets[did] = max(datasets[did], dataset['accessed_at']) if rse is None: continue if did not in dslocks: dslocks[did] = {} if rse not in dslocks[did]: dslocks[did][rse] = dataset['accessed_at'] else: dslocks[did][rse] = max(dataset['accessed_at'], dslocks[did][rse]) logger(logging.INFO, 'fetched %d datasets from queue (%ds)' % (len_ds, time() - now)) total, failed, start = 0, 0, time() for did, accessed_at in datasets.items(): scope, name = did.split(':') scope = InternalScope(scope, fromExternal=False) update_did = { 'scope': scope, 'name': name, 'type': DIDType.DATASET, 'accessed_at': accessed_at } # if update fails, put back in queue and retry next time if not touch_dids((update_did, )): update_did['rse_id'] = None dataset_queue.put(update_did) failed += 1 total += 1 logger( logging.INFO, 'update done for %d datasets, %d failed (%ds)' % (total, failed, time() - start)) total, failed, start = 0, 0, time() for did, rses in dslocks.items(): scope, name = did.split(':') scope = InternalScope(scope, fromExternal=False) for rse, accessed_at in rses.items(): update_dslock = { 'scope': scope, 'name': name, 'rse_id': rse, 'accessed_at': accessed_at } # if update fails, put back in queue and retry next time if not touch_dataset_locks((update_dslock, )): dataset_queue.put(update_dslock) failed += 1 total += 1 logger( logging.INFO, 'update done for %d locks, %d failed (%ds)' % (total, failed, time() - start)) total, failed, start = 0, 0, time() for did, rses in dslocks.items(): scope, name = did.split(':') scope = InternalScope(scope, fromExternal=False) for rse, accessed_at in rses.items(): update_dslock = { 'scope': scope, 'name': name, 'rse_id': rse, 'accessed_at': accessed_at } # if update fails, put back in queue and retry next time if not touch_collection_replicas((update_dslock, )): dataset_queue.put(update_dslock) failed += 1 total += 1 logger( logging.INFO, 'update done for %d collection replicas, %d failed (%ds)' % (total, failed, time() - start))
def add_missing_replicas(self, missing): """ :param missing: possible missing lfns :return: """ with monitor.record_timer_block('cms_sync.time_add_replica'): if missing and self.dry_run: logging.info('Dry run: Adding replicas %s to rse %s.', str(missing), self.rse) elif missing: logging.info('Adding %s replicas to rse %s.', len(missing), self.rse) replicas_to_add = [self.replicas[lfn] for lfn in missing] files = replica_file_list(replicas=replicas_to_add, scope=self.scope) for rucio_file in files: try: update_file = copy.deepcopy(rucio_file) update_file.update({ 'scope': InternalScope(self.scope), "rse_id": self.rse_id, "state": "A" }) update_replicas_states(replicas=[update_file], add_tombstone=False) except ReplicaNotFound: resurrect_file = copy.deepcopy(rucio_file) resurrect_file.update({'scope': 'cms', 'type': 'FILE'}) try: add_replicas(rse=self.rse, files=[resurrect_file], issuer=self.account, ignore_availability=True) except RucioException: logging.critical( 'Could not add %s to %s. Constraint violated?', resurrect_file, self.rse) resurrect_file.update({ 'scope': 'cms', 'type': 'FILE' }) # Reset to Internal scope by call resurrect([resurrect_file], issuer=self.account) resurrect_file.update({ 'scope': 'cms', 'type': 'FILE' }) # Reset to Internal scope by call add_replicas(rse=self.rse, files=[resurrect_file], issuer=self.account, ignore_availability=True) logging.critical('Resurrected %s at %s', resurrect_file, self.rse) # add_replicas(rse=self.rse, files=files, issuer=self.account) lfns = [ item['name'] for item in list_files( scope=self.scope, name=self.block_name, long=False) ] missing_lfns = list(set(missing) - set(lfns)) if missing_lfns: dids = [{ 'scope': self.scope, 'name': lfn } for lfn in missing_lfns] try: attach_dids(scope=self.scope, name=self.block_name, attachment={'dids': dids}, issuer=self.account) except FileAlreadyExists: logging.warning( 'Trying to attach already existing files to %s', self.block_name) except DataIdentifierNotFound: logging.critical( 'Could not attach to %s at %s. Constraint violated?', self.block_name, self.rse) except UnsupportedOperation: for did in dids: did['scope'] = self.scope # Get's converted to object retry_dids = [did] try: attach_dids(scope=self.scope, name=self.block_name, attachment={'dids': retry_dids}, issuer=self.account) logging.warning( 'Attaching LFNs one at a time: %s to %s at %s' % (did['name'], self.block_name, self.rse)) except UnsupportedOperation: logging.warning( 'Failed to attach %s to %s at %s', did['name'], self.block_name, self.rse) return len(missing_lfns)
def test_abacus_collection_replica_new(vo, rse_factory, rucio_client, did_factory, core_config_mock, caches_mock): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ file_sizes = 2 nfiles = 2 dataset_scope = 'mock' rse, rse_id = rse_factory.make_posix_rse() dids = did_factory.upload_test_dataset(rse_name=rse, scope=dataset_scope, size=file_sizes, nb_files=nfiles) files = [{ 'scope': did['did_scope'], 'name': did['did_name'] } for did in dids] dataset = dids[0]['dataset_name'] rucio_client.set_metadata(dataset_scope, dataset, 'lifetime', -1) rucio_client.add_replication_rule([{ 'scope': dataset_scope, 'name': dataset }], 1, rse, lifetime=-1) # Check dataset replica after rule creation - initial data dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['bytes'] == 0 assert dataset_replica['length'] == 0 assert dataset_replica['available_bytes'] == 0 assert dataset_replica['available_length'] == 0 assert str(dataset_replica['state']) == 'UNAVAILABLE' # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['bytes'] == len(files) * file_sizes assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_sizes assert dataset_replica['available_length'] == len(files) assert str(dataset_replica['state']) == 'AVAILABLE' # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=rse_id, files=[{ 'name': files[0]['name'], 'scope': InternalScope(dataset_scope, vo) }]) rucio_client.add_replication_rule([{ 'scope': dataset_scope, 'name': dataset }], 1, rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['length'] == len(files) assert dataset_replica['bytes'] == len(files) * file_sizes assert dataset_replica['available_length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_sizes assert str(dataset_replica['state']) == 'UNAVAILABLE' # Delete all files -> collection replica should be deleted # New behaviour (dataset should be deleted) cleaner.run(once=True) delete_replicas(rse_id=rse_id, files=[{ 'name': files[1]['name'], 'scope': InternalScope(dataset_scope, vo) }]) with pytest.raises(DataIdentifierNotFound): get_did(scope=InternalScope(dataset_scope), name=dataset)
def mock_scope(vo): from rucio.common.types import InternalScope return InternalScope('mock', vo=vo)
def process_result_value(self, value, dialect): if value is None: return value return InternalScope(value, fromExternal=False)
def test_undertaker(self): """ UNDERTAKER (CORE): Test the undertaker. """ tmp_scope = InternalScope('mock', **self.vo) jdoe = InternalAccount('jdoe', **self.vo) root = InternalAccount('root', **self.vo) nbdatasets = 5 nbfiles = 5 rse = 'MOCK' rse_id = get_rse_id('MOCK', **self.vo) set_account_limit(jdoe, rse_id, -1) dsns1 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1 } for i in range(nbdatasets)] dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for i in range(nbdatasets)] add_dids(dids=dsns1 + dsns2, account=root) replicas = list() for dsn in dsns1 + dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files add_rules(dids=dsns1, rules=[{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }]) undertaker(worker_number=1, total_workers=1, once=True) undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert_not_equal( get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'], None)
def test_scope(vo): from rucio.common.types import InternalScope return InternalScope('test', vo=vo)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} new_vo = {'vo': 'new'} if not vo_core.vo_exists(**new_vo): vo_core.add_vo(description='Test', email='*****@*****.**', **new_vo) if not scope_core.check_scope(InternalScope('data13_hip', **new_vo)): scope_core.add_scope(InternalScope('data13_hip', **new_vo), InternalAccount('root', **new_vo)) nb_rses = 2 else: vo = {} new_vo = {} nb_rses = 1 mock_protocol = {'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} nb_files = 250 file_size = 200 # 2G rse_names = [] all_file_names = [] for j in range(nb_rses): rse_name = rse_name_generator() rse_names.append(rse_name) rse_id = rse_core.add_rse(rse_name, **vo) rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) if new_vo: rse_id_new = rse_core.add_rse(rse_name, **new_vo) rse_core.add_protocol(rse_id=rse_id_new, parameter=mock_protocol) file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) if new_vo: replica_core.add_replica(rse_id=rse_id_new, scope=InternalScope('data13_hip', **new_vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **new_vo), adler32=None, md5=None) all_file_names.append(file_names) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if new_vo: rse_core.set_rse_usage(rse_id=rse_id_new, source='storage', used=nb_files * file_size, free=1) rse_core.set_rse_limits(rse_id=rse_id_new, name='MinFreeSpace', value=50 * file_size) # rse_core.set_rse_limits(rse_id=rse_id_new, name='MaxBeingDeletedFiles', value=10) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() if not vo: assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Check first if the reaper does not delete anything if no space is needed rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=323000000000) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == nb_files # Now put it over threshold and delete rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=1) from rucio.daemons.reaper.reaper2 import REGION REGION.invalidate() reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_name))) == 200 else: # Check we reap all VOs by default reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_names[0], exclude_rses=[]) assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200 assert len(list(replica_core.list_replicas(dids=[{'scope': InternalScope('data13_hip', **new_vo), 'name': n} for n in all_file_names[0]], rse_expression=rse_names[0]))) == 200
def test_judge_evaluate_detach(self): """ JUDGE EVALUATOR: Test if the detach is done correctly""" re_evaluator(once=True, did_limit=1000) scope = InternalScope('mock', **self.vo) container = 'container_' + str(uuid()) add_did(scope, container, DIDType.CONTAINER, self.jdoe) scope = InternalScope('mock', **self.vo) files = create_files(3, scope, self.rse1_id, bytes_=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.DATASET, self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) scope = InternalScope('mock', **self.vo) files = create_files(3, scope, self.rse1_id, bytes_=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.DATASET, self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) scope = InternalScope('mock', **self.vo) files = create_files(3, scope, self.rse1_id, bytes_=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.DATASET, self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) # Add a first rule to the Container rule_id = add_rule(dids=[{ 'scope': scope, 'name': container }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None)[0] # Fake judge re_evaluator(once=True, did_limit=1000) assert (9 == get_rule(rule_id)['locks_ok_cnt']) detach_dids(scope, dataset, [files[0]]) # Fake judge re_evaluator(once=True, did_limit=1000) assert (8 == get_rule(rule_id)['locks_ok_cnt'])
def test_queue_requests_state(vo, use_preparer): """ REQUEST (CORE): test queuing requests """ if use_preparer == 'preparer enabled': use_preparer = True elif use_preparer == 'preparer disabled': use_preparer = False else: return pytest.xfail(reason=f'unknown test parameter use_preparer={use_preparer}') db_session = session.get_session() dest_rse = 'MOCK' dest_rse2 = 'MOCK2' source_rse = 'MOCK4' source_rse2 = 'MOCK5' dest_rse_id = get_rse_id(dest_rse, vo=vo) dest_rse_id2 = get_rse_id(dest_rse2, vo=vo) source_rse_id = get_rse_id(source_rse, vo=vo) source_rse_id2 = get_rse_id(source_rse2, vo=vo) scope = InternalScope('mock', vo=vo) account = InternalAccount('root', vo=vo) user_activity = 'User Subscription' config_set('conveyor', 'use_preparer', str(use_preparer)) target_state = RequestState.PREPARING if use_preparer else RequestState.QUEUED name = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() add_replica(source_rse_id, scope, name, 1, account, session=db_session) add_replica(source_rse_id2, scope, name2, 1, account, session=db_session) add_replica(source_rse_id, scope, name3, 1, account, session=db_session) set_rse_transfer_limits(dest_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(dest_rse_id2, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id, user_activity, max_transfers=1, session=db_session) set_rse_transfer_limits(source_rse_id2, user_activity, max_transfers=1, session=db_session) requests = [{ 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id, 'src_rse_id': source_rse_id2, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'unknown', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': dest_rse_id2, 'src_rse_id': source_rse_id, 'request_type': RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': user_activity, 'bytes': 10, 'md5': '', 'adler32': '' } }] try: queue_requests(requests, session=db_session) request = get_request_by_did(scope, name, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name2, dest_rse_id, session=db_session) assert request['state'] == target_state request = get_request_by_did(scope, name3, dest_rse_id2, session=db_session) assert request['state'] == target_state finally: config_remove_option('conveyor', 'use_preparer') db_session.query(models.Source).delete() db_session.query(models.Request).delete() db_session.query(models.RSETransferLimit).delete() db_session.query(models.Distance).delete() db_session.commit() reset_config_table()
def add_did(scope, name, type, issuer, account=None, statuses={}, meta={}, rules=[], lifetime=None, dids=[], rse=None, vo='def'): """ Add data did. :param scope: The scope name. :param name: The data identifier name. :param type: The data identifier type. :param issuer: The issuer account. :param account: The account owner. If None, then issuer is selected as owner. :param statuses: Dictionary with statuses, e.g.g {'monotonic':True}. :meta: Meta-data associated with the data identifier is represented using key/value pairs in a dictionary. :rules: Replication rules associated with the data did. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ]. :param lifetime: DID's lifetime (in seconds). :param dids: The content. :param rse: The RSE name when registering replicas. :param vo: The VO to act on. """ v_did = {'name': name, 'type': type.upper(), 'scope': scope} validate_schema(name='did', obj=v_did) validate_schema(name='dids', obj=dids) validate_schema(name='rse', obj=rse) kwargs = { 'scope': scope, 'name': name, 'type': type, 'issuer': issuer, 'account': account, 'statuses': statuses, 'meta': meta, 'rules': rules, 'lifetime': lifetime } if not rucio.api.permission.has_permission( issuer=issuer, vo=vo, action='add_did', kwargs=kwargs): raise rucio.common.exception.AccessDenied( 'Account %s can not add data identifier to scope %s' % (issuer, scope)) if account is not None: account = InternalAccount(account, vo=vo) issuer = InternalAccount(issuer, vo=vo) scope = InternalScope(scope, vo=vo) for d in dids: d['scope'] = InternalScope(d['scope'], vo=vo) for r in rules: r['account'] = InternalAccount(r['account'], vo=vo) rse_id = None if rse is not None: rse_id = get_rse_id(rse=rse, vo=vo) if type == 'DATASET': # naming_convention validation extra_meta = naming_convention.validate_name(scope=scope, name=name, did_type='D') # merge extra_meta with meta for k in extra_meta or {}: if k not in meta: meta[k] = extra_meta[k] elif meta[k] != extra_meta[k]: print( "Provided metadata %s doesn't match the naming convention: %s != %s" % (k, meta[k], extra_meta[k])) raise rucio.common.exception.InvalidObject( "Provided metadata %s doesn't match the naming convention: %s != %s" % (k, meta[k], extra_meta[k])) # Validate metadata meta_core.validate_meta(meta=meta, did_type=DIDType.from_sym(type)) return did.add_did(scope=scope, name=name, type=DIDType.from_sym(type), account=account or issuer, statuses=statuses, meta=meta, rules=rules, lifetime=lifetime, dids=dids, rse_id=rse_id)
def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': get_vo()} else: self.vo = {} self.root = InternalAccount('root', **self.vo) # add an S3 storage with a replica self.rc = client.ReplicaClient() self.rses3 = rse_name_generator() self.rses3_id = add_rse(self.rses3, **self.vo) add_protocol( self.rses3_id, { 'scheme': 'https', 'hostname': 'fake-rucio.s3-eu-south-8.amazonaws.com', 'port': 443, 'prefix': '/', 'impl': 'rucio.rse.protocols.gfal.NoRename', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1 } } }) add_rse_attribute(rse_id=self.rses3_id, key='sign_url', value='s3') add_rse_attribute(rse_id=self.rses3_id, key='fts', value='localhost') self.files3 = [{ 'scope': InternalScope('mock', **self.vo), 'name': 'file-on-aws', 'bytes': 1234, 'adler32': 'deadbeef', 'meta': { 'events': 123 } }] add_replicas(rse_id=self.rses3_id, files=self.files3, account=self.root) # add a non-S3 storage with a replica self.rsenons3 = rse_name_generator() self.rsenons3_id = add_rse(self.rsenons3, **self.vo) add_protocol( self.rsenons3_id, { 'scheme': 'https', 'hostname': 'somestorage.ch', 'port': 1094, 'prefix': '/my/prefix', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1 } } }) add_rse_attribute(rse_id=self.rsenons3_id, key='fts', value='localhost') self.filenons3 = [{ 'scope': InternalScope('mock', **self.vo), 'name': 'file-on-storage', 'bytes': 1234, 'adler32': 'deadbeef', 'meta': { 'events': 321 } }] add_replicas(rse_id=self.rsenons3_id, files=self.filenons3, account=self.root) # set the distance both ways add_distance(self.rses3_id, self.rsenons3_id, ranking=1, agis_distance=1, geoip_distance=1) add_distance(self.rsenons3_id, self.rses3_id, ranking=1, agis_distance=1, geoip_distance=1)
def test_reaper(): """ REAPER2 (DAEMON): Test the reaper2 daemon.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } else: vo = {} rse_name = rse_name_generator() rse_id = rse_core.add_rse(rse_name, **vo) mock_protocol = { 'scheme': 'MOCK', 'hostname': 'localhost', 'port': 123, 'prefix': '/test/reaper', 'impl': 'rucio.rse.protocols.mock.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } } rse_core.add_protocol(rse_id=rse_id, parameter=mock_protocol) nb_files = 30 file_size = 2147483648 # 2G file_names = [] for i in range(nb_files): file_name = 'lfn' + generate_uuid() file_names.append(file_name) replica_core.add_replica(rse_id=rse_id, scope=InternalScope('data13_hip', **vo), name=file_name, bytes=file_size, tombstone=datetime.utcnow() - timedelta(days=1), account=InternalAccount('root', **vo), adler32=None, md5=None) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=nb_files * file_size, free=800) rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=10737418240) rse_core.set_rse_limits(rse_id=rse_id, name='MaxBeingDeletedFiles', value=10) if vo: reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) reaper(once=True, rses=[], include_rses='vo=%s&(%s)' % (vo['vo'], rse_name), exclude_rses=[]) else: reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=[]) assert_equal( len( list( replica_core.list_replicas(dids=[{ 'scope': InternalScope('data13_hip', **vo), 'name': n } for n in file_names], rse_expression=rse_name))), nb_files - 5)
def protocols_setup(vo): rse_info = copy.deepcopy(base_rse_info) files = [{ 'scope': InternalScope('mock', vo=vo), 'name': 'element_0', 'bytes': 1234, 'adler32': 'deadbeef' }] root = InternalAccount('root', vo=vo) for idx in range(len(rse_info)): rse_info[idx]['name'] = '%s_%s' % (rse_info[idx]['site'], rse_name_generator()) rse_info[idx]['id'] = add_rse(rse_info[idx]['name'], vo=vo) add_rse_attribute(rse_id=rse_info[idx]['id'], key='site', value=base_rse_info[idx]['site']) add_replicas(rse_id=rse_info[idx]['id'], files=files, account=root) # invalidate cache for parse_expression('site=…') rse_expression_parser.REGION.invalidate() # check sites for idx in range(len(rse_info)): site_rses = rse_expression_parser.parse_expression( 'site=' + base_rse_info[idx]['site']) assert len(site_rses) > 0 assert rse_info[idx]['id'] in [rse['id'] for rse in site_rses] add_protocol( rse_info[0]['id'], { 'scheme': schemes[0], 'hostname': ('root.%s' % base_rse_info[0]['address']), 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) add_protocol( rse_info[0]['id'], { 'scheme': schemes[2], 'hostname': ('davs.%s' % base_rse_info[0]['address']), 'port': 443, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 2, 'write': 2, 'delete': 2 }, 'wan': { 'read': 2, 'write': 2, 'delete': 2 } } }) add_protocol( rse_info[0]['id'], { 'scheme': schemes[1], 'hostname': ('gsiftp.%s' % base_rse_info[0]['address']), 'port': 8446, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 0, 'write': 0, 'delete': 0 }, 'wan': { 'read': 3, 'write': 3, 'delete': 3 } } }) add_protocol( rse_info[1]['id'], { 'scheme': schemes[1], 'hostname': ('gsiftp.%s' % base_rse_info[1]['address']), 'port': 8446, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 2, 'write': 2, 'delete': 2 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) add_protocol( rse_info[1]['id'], { 'scheme': schemes[2], 'hostname': ('davs.%s' % base_rse_info[1]['address']), 'port': 443, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 0, 'write': 0, 'delete': 0 }, 'wan': { 'read': 2, 'write': 2, 'delete': 2 } } }) add_protocol( rse_info[1]['id'], { 'scheme': schemes[0], 'hostname': ('root.%s' % base_rse_info[1]['address']), 'port': 1409, 'prefix': '//lambda/complex/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 3, 'write': 3, 'delete': 3 } } }) yield {'files': files, 'rse_info': rse_info} for info in rse_info: delete_replicas(rse_id=info['id'], files=files) del_rse_attribute(rse_id=info['id'], key='site') del_rse(info['id'])
def setup(self): self.rc = client.ReplicaClient() self.rse1 = rse_name_generator() self.rse2 = rse_name_generator() self.rse1_id = add_rse(self.rse1) self.rse2_id = add_rse(self.rse2) add_protocol( self.rse1_id, { 'scheme': 'https', 'hostname': 'storage.googleapis.com', 'port': 443, 'prefix': '/atlas-europe-west1/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1 } } }) add_protocol( self.rse2_id, { 'scheme': 'https', 'hostname': 'storage.googleapis.com', 'port': 443, 'prefix': '/atlas-europe-east1/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy': 1 } } }) # register some files there self.files = [{ 'scope': InternalScope('mock'), 'name': 'file-on-gcs_%s' % i, 'bytes': 1234, 'adler32': 'deadbeef', 'meta': { 'events': 666 } } for i in range(0, 3)] root = InternalAccount('root') add_replicas(rse_id=self.rse1_id, files=self.files, account=root, ignore_availability=True) add_replicas(rse_id=self.rse2_id, files=self.files, account=root, ignore_availability=True) def tearDown(self): delete_replicas(rse_id=self.rse1_id, files=self.files) delete_replicas(rse_id=self.rse2_id, files=self.files) del_rse(rse_id=self.rse1_id) del_rse(rse_id=self.rse2_id)
def _sanitize_or_set_scope(self, scope): if not scope: scope = self.default_scope elif isinstance(scope, str): scope = InternalScope(scope, vo=self.vo) return scope
def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': get_vo()} else: self.vo = {} self.rc = client.ReplicaClient() self.rse1 = rse_name_generator() self.rse2 = rse_name_generator() self.rse1_id = add_rse(self.rse1, **self.vo) self.rse2_id = add_rse(self.rse2, **self.vo) add_protocol( self.rse1_id, { 'scheme': 'https', 'hostname': 'storage.googleapis.com', 'port': 443, 'prefix': '/atlas-europe-west1/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy_read': 1, 'third_party_copy_write': 1 } } }) add_protocol( self.rse2_id, { 'scheme': 'https', 'hostname': 'storage.googleapis.com', 'port': 443, 'prefix': '/atlas-europe-east1/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1, 'third_party_copy_read': 1, 'third_party_copy_write': 1 } } }) # register some files there self.files = [{ 'scope': InternalScope('mock', **self.vo), 'name': 'file-on-gcs_%s' % i, 'bytes': 1234, 'adler32': 'deadbeef', 'meta': { 'events': 666 } } for i in range(0, 3)] root = InternalAccount('root', **self.vo) add_replicas(rse_id=self.rse1_id, files=self.files, account=root, ignore_availability=True) add_replicas(rse_id=self.rse2_id, files=self.files, account=root, ignore_availability=True)
def test_atlas_archival_policy(self): """ UNDERTAKER (CORE): Test the atlas archival policy. """ tmp_scope = InternalScope('mock', **self.vo) jdoe = InternalAccount('jdoe', **self.vo) root = InternalAccount('root', **self.vo) nbdatasets = 5 nbfiles = 5 rse = 'LOCALGROUPDISK_%s' % rse_name_generator() rse_id = add_rse(rse, **self.vo) set_account_limit(jdoe, rse_id, -1) dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for i in range(nbdatasets)] add_dids(dids=dsns2, account=root) replicas = list() for dsn in dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert (get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'] is None) for dsn in dsns2: assert (get_did(scope=InternalScope('archive', **self.vo), name=dsn['name'])['name'] == dsn['name']) assert (len([ x for x in list_rules( filters={ 'scope': InternalScope('archive', **self.vo), 'name': dsn['name'] }) ]) == 1)
def __init__(self): self.scopes = [InternalScope(scope_name_generator()) for _ in range(5)] self.jdoe = InternalAccount('jdoe')
def test_abacus_collection_replica(self): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ self.files = [{ 'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1 } for _ in range(0, 2)] self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1) self.upload_client.upload(self.files) self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{ 'name': file['did_name'], 'scope': file['did_scope'] } for file in self.files]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) [os.remove(file['path']) for file in self.files] # Check dataset replica after rule creation - initial data dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['bytes'] == 0 assert dataset_replica['length'] == 0 assert dataset_replica['available_bytes'] == 0 assert dataset_replica['available_length'] == 0 assert str(dataset_replica['state']) == 'UNAVAILABLE' # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['bytes'] == len(self.files) * self.file_sizes assert dataset_replica['length'] == len(self.files) assert dataset_replica['available_bytes'] == len( self.files) * self.file_sizes assert dataset_replica['available_length'] == len(self.files) assert str(dataset_replica['state']) == 'AVAILABLE' # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=self.rse_id, files=[{ 'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'], **self.vo) }]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['length'] == len(self.files) assert dataset_replica['bytes'] == len(self.files) * self.file_sizes assert dataset_replica['available_length'] == len(self.files) - 1 assert dataset_replica['available_bytes'] == (len(self.files) - 1) * self.file_sizes assert str(dataset_replica['state']) == 'UNAVAILABLE' # Delete all files -> collection replica should be deleted # Old behaviour (doesn't delete the DID) cleaner.run(once=True) reaper.REGION.invalidate() if self.vo: reaper.run(once=True, include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse), greedy=True) else: reaper.run(once=True, include_rses=self.rse, greedy=True) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ] assert dataset_replica[0]['length'] == 0 assert dataset_replica[0]['available_length'] == 0
def test_is_scope_owner(self): """ SCOPE (CORE): Is scope owner """ scope = InternalScope(scope_name_generator()) add_scope(scope=scope, account=self.jdoe) anwser = is_scope_owner(scope=scope, account=self.jdoe) assert_equal(anwser, True)
def re_evaluator(once=False, sleep_time=30, did_limit=100): """ Main loop to check the re-evaluation of dids. """ hostname = socket.gethostname() pid = os.getpid() current_thread = threading.current_thread() paused_dids = {} # {(scope, name): datetime} # Make an initial heartbeat so that all judge-evaluators have the correct worker number on the next try executable = 'judge-evaluator' live(executable=executable, hostname=hostname, pid=pid, thread=current_thread, older_than=60 * 30) graceful_stop.wait(1) while not graceful_stop.is_set(): try: # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=current_thread, older_than=60 * 30) start = time.time() # NOQA # Refresh paused dids paused_dids = dict((k, v) for k, v in iteritems(paused_dids) if datetime.utcnow() < v) # Select a bunch of dids for re evaluation for this worker dids = get_updated_dids(total_workers=heartbeat['nr_threads'], worker_number=heartbeat['assign_thread'], limit=did_limit, blocked_dids=[ (InternalScope(key[0], fromExternal=False), key[1]) for key in paused_dids ]) logging.debug( 're_evaluator[%s/%s] index query time %f fetch size is %d (%d blocked)' % (heartbeat['assign_thread'], heartbeat['nr_threads'], time.time() - start, len(dids), len([(InternalScope(key[0], fromExternal=False), key[1]) for key in paused_dids]))) # If the list is empty, sent the worker to sleep if not dids and not once: logging.debug( 're_evaluator[%s/%s] did not get any work (paused_dids=%s)' % (heartbeat['assign_thread'], heartbeat['nr_threads'], str(len(paused_dids)))) daemon_sleep(start_time=start, sleep_time=sleep_time, graceful_stop=graceful_stop) else: done_dids = {} for did in dids: if graceful_stop.is_set(): break # Check if this did has already been operated on did_tag = '%s:%s' % (did.scope.internal, did.name) if did_tag in done_dids: if did.rule_evaluation_action in done_dids[did_tag]: logging.debug( 're_evaluator[%s/%s]: evaluation of %s:%s already done' % (heartbeat['assign_thread'], heartbeat['nr_threads'], did.scope, did.name)) delete_updated_did(id_=did.id) continue else: done_dids[did_tag] = [] # Jump paused dids if (did.scope.internal, did.name) in paused_dids: continue try: start_time = time.time() re_evaluate_did( scope=did.scope, name=did.name, rule_evaluation_action=did.rule_evaluation_action) logging.debug( 're_evaluator[%s/%s]: evaluation of %s:%s took %f' % (heartbeat['assign_thread'], heartbeat['nr_threads'], did.scope, did.name, time.time() - start_time)) delete_updated_did(id_=did.id) done_dids[did_tag].append(did.rule_evaluation_action) except DataIdentifierNotFound: delete_updated_did(id_=did.id) except (DatabaseException, DatabaseError) as e: if match('.*ORA-00054.*', str(e.args[0])): paused_dids[( did.scope.internal, did.name)] = datetime.utcnow() + timedelta( seconds=randint(60, 600)) logging.warning( 're_evaluator[%s/%s]: Locks detected for %s:%s' % (heartbeat['assign_thread'], heartbeat['nr_threads'], did.scope, did.name)) record_counter( 'rule.judge.exceptions.LocksDetected') elif match('.*QueuePool.*', str(e.args[0])): logging.warning(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) elif match('.*ORA-03135.*', str(e.args[0])): logging.warning(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) else: logging.error(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) except ReplicationRuleCreationTemporaryFailed as e: record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) logging.warning( 're_evaluator[%s/%s]: Replica Creation temporary failed, retrying later for %s:%s' % (heartbeat['assign_thread'], heartbeat['nr_threads'], did.scope, did.name)) except FlushError as e: record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) logging.warning( 're_evaluator[%s/%s]: Flush error for %s:%s' % (heartbeat['assign_thread'], heartbeat['nr_threads'], did.scope, did.name)) except (DatabaseException, DatabaseError) as e: if match('.*QueuePool.*', str(e.args[0])): logging.warning(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) elif match('.*ORA-03135.*', str(e.args[0])): logging.warning(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) else: logging.critical(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) except Exception as e: logging.critical(traceback.format_exc()) record_counter('rule.judge.exceptions.%s' % e.__class__.__name__) if once: break die(executable=executable, hostname=hostname, pid=pid, thread=current_thread)
def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } else: self.vo = {} self.replica_client = ReplicaClient() # Using two test RSEs self.rse4suspicious = 'MOCK_SUSPICIOUS' self.rse4suspicious_id = get_rse_id(self.rse4suspicious, **self.vo) self.rse4recovery = 'MOCK_RECOVERY' self.rse4recovery_id = get_rse_id(self.rse4recovery, **self.vo) self.scope = 'mock' self.internal_scope = InternalScope(self.scope, **self.vo) # For testing, we create 3 files and upload them to Rucio to two test RSEs. self.tmp_file1 = file_generator() self.tmp_file2 = file_generator() self.tmp_file3 = file_generator() self.listdids = [{ 'scope': self.internal_scope, 'name': path.basename(f), 'type': DIDType.FILE } for f in [self.tmp_file1, self.tmp_file2, self.tmp_file3]] for rse in [self.rse4suspicious, self.rse4recovery]: cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format( rse, self.scope, self.tmp_file1, self.tmp_file2, self.tmp_file3) exitcode, out, err = execute(cmd) # checking if Rucio upload went OK assert exitcode == 0 # removing physical files from /tmp location - keeping only their DB info remove(self.tmp_file1) remove(self.tmp_file2) remove(self.tmp_file3) # Gather replica info replicalist = list_replicas(dids=self.listdids) # Changing the replica statuses as follows: # -------------------------------------------------------------------------------------------- # Name State(s) declared on MOCK_RECOVERY State(s) declared on MOCK_SUSPICIOUS # -------------------------------------------------------------------------------------------- # tmp_file1 available suspicious (available) # tmp_file2 available suspicious + bad (unavailable) # tmp_file3 unavailable suspicious (available) # -------------------------------------------------------------------------------------------- for replica in replicalist: suspicious_pfns = replica['rses'][self.rse4suspicious_id] for i in range(3): print("Declaring suspicious file replica: " + suspicious_pfns[0]) self.replica_client.declare_suspicious_file_replicas([ suspicious_pfns[0], ], 'This is a good reason.') sleep(1) if replica['name'] == path.basename(self.tmp_file2): print("Declaring bad file replica: " + suspicious_pfns[0]) self.replica_client.declare_bad_file_replicas([ suspicious_pfns[0], ], 'This is a good reason') if replica['name'] == path.basename(self.tmp_file3): print("Updating replica state as unavailable: " + replica['rses'][self.rse4recovery_id][0]) update_replica_state(self.rse4recovery_id, self.internal_scope, path.basename(self.tmp_file3), ReplicaState.UNAVAILABLE) # Gather replica info after setting initial replica statuses replicalist = list_replicas(dids=self.listdids) # Checking if the status changes were effective for replica in replicalist: if replica['name'] == path.basename(self.tmp_file1): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert replica['states'][self.rse4recovery_id] == 'AVAILABLE' if replica['name'] == path.basename(self.tmp_file2): assert (self.rse4suspicious_id in replica['states']) is False assert replica['states'][self.rse4recovery_id] == 'AVAILABLE' if replica['name'] == path.basename(self.tmp_file3): assert replica['states'][self.rse4suspicious_id] == 'AVAILABLE' assert (self.rse4recovery_id in replica['states']) is False # Checking if only self.tmp_file2 is declared as 'BAD' self.from_date = datetime.now() - timedelta(days=1) bad_replicas_list = list_bad_replicas_status( rse_id=self.rse4suspicious_id, younger_than=self.from_date, **self.vo) bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list] assert (path.basename(self.tmp_file2), self.rse4suspicious_id, BadFilesStatus.BAD) in bad_checklist assert (path.basename(self.tmp_file1), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file3), self.rse4suspicious_id, BadFilesStatus.BAD) not in bad_checklist bad_replicas_list = list_bad_replicas_status( rse_id=self.rse4recovery_id, younger_than=self.from_date, **self.vo) bad_checklist = [(badf['name'], badf['rse_id'], badf['state']) for badf in bad_replicas_list] assert (path.basename(self.tmp_file1), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file2), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist assert (path.basename(self.tmp_file3), self.rse4recovery_id, BadFilesStatus.BAD) not in bad_checklist
def __update_atime(self): """ Bulk update atime. """ replicas = [] rses = [] for report in self.__reports: if 'vo' not in report: report['vo'] = 'def' try: # Identify suspicious files try: if self.__bad_files_patterns and report['eventType'] in [ 'get_sm', 'get_sm_a', 'get' ] and 'clientState' in report and report[ 'clientState'] not in [ 'DONE', 'FOUND_ROOT', 'ALREADY_DONE' ]: for pattern in self.__bad_files_patterns: if 'stateReason' in report and report[ 'stateReason'] and isinstance( report['stateReason'], str) and pattern.match( report['stateReason']): reason = report['stateReason'][:255] if 'url' not in report or not report['url']: self.__logger( logging.ERROR, 'Missing url in the following trace : ' + str(report)) else: try: surl = report['url'] declare_bad_file_replicas( [ surl, ], reason=reason, issuer=InternalAccount( 'root', vo=report['vo']), status=BadFilesStatus.SUSPICIOUS) self.__logger( logging.INFO, 'Declare suspicious file %s with reason %s' % (report['url'], reason)) except Exception as error: self.__logger( logging.ERROR, 'Failed to declare suspicious file' + str(error)) except Exception as error: self.__logger( logging.ERROR, 'Problem with bad trace : %s . Error %s' % (str(report), str(error))) # check if scope in report. if not skip this one. if 'scope' not in report: record_counter('daemons.tracer.kronos.missing_scope') if report['eventType'] != 'touch': continue else: record_counter('daemons.tracer.kronos.with_scope') report['scope'] = InternalScope(report['scope'], report['vo']) # handle all events starting with get* and download and touch events. if not report['eventType'].startswith('get') and not report[ 'eventType'].startswith('sm_get') and not report[ 'eventType'] == 'download' and not report[ 'eventType'] == 'touch': continue if report['eventType'].endswith('_es'): continue record_counter('daemons.tracer.kronos.total_get') if report['eventType'] == 'get': record_counter('daemons.tracer.kronos.dq2clients') elif report['eventType'] == 'get_sm' or report[ 'eventType'] == 'sm_get': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_production_act') else: record_counter( 'daemons.tracer.kronos.panda_production') elif report['eventType'] == 'get_sm_a' or report[ 'eventType'] == 'sm_get_a': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_analysis_act') else: record_counter('daemons.tracer.kronos.panda_analysis') elif report['eventType'] == 'download': record_counter('daemons.tracer.kronos.rucio_download') elif report['eventType'] == 'touch': record_counter('daemons.tracer.kronos.rucio_touch') else: record_counter('daemons.tracer.kronos.other_get') if report['eventType'] == 'download' or report[ 'eventType'] == 'touch': report['usrdn'] = report['account'] if report['usrdn'] in self.__excluded_usrdns: continue # handle touch and non-touch traces differently if report['eventType'] != 'touch': # check if the report has the right state. if 'eventVersion' in report: if report['eventVersion'] != 'aCT': if report['clientState'] in self.__excluded_states: continue if 'remoteSite' not in report: continue if not report['remoteSite']: continue if 'filename' not in report: if 'name' in report: report['filename'] = report['name'] rses = report['remoteSite'].strip().split(',') for rse in rses: try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger( logging.WARNING, "Cannot lookup rse_id for %s. Will skip this report.", rse) record_counter( 'daemons.tracer.kronos.rse_not_found') continue replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': rse, 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']), 'traceTimeentryUnix': report['traceTimeentryUnix'], 'eventVersion': report['eventVersion'] }) else: # if touch event and if datasetScope is in the report then it means # that there is no file scope/name and therefore only the dataset is # put in the queue to be updated and the rest is skipped. rse_id = None rse = None if 'remoteSite' in report: rse = report['remoteSite'] try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger(logging.WARNING, "Cannot lookup rse_id for %s.", rse) record_counter( 'daemons.tracer.kronos.rse_not_found') if 'datasetScope' in report: self.__dataset_queue.put({ 'scope': InternalScope(report['datasetScope'], vo=report['vo']), 'name': report['dataset'], 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) continue else: if 'remoteSite' not in report: continue replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': rse, 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) except (KeyError, AttributeError): self.__logger(logging.ERROR, "Cannot handle report.", exc_info=True) record_counter('daemons.tracer.kronos.report_error') continue except Exception: self.__logger(logging.ERROR, "Exception", exc_info=True) continue for did in list_parent_dids(report['scope'], report['filename']): if did['type'] != DIDType.DATASET: continue # do not update _dis datasets if did['scope'].external == 'panda' and '_dis' in did['name']: continue for rse in rses: try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger( logging.WARNING, "Cannot lookup rse_id for %s. Will skip this report.", rse) record_counter('daemons.tracer.kronos.rse_not_found') continue self.__dataset_queue.put({ 'scope': did['scope'], 'name': did['name'], 'did_type': did['type'], 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp(report['traceTimeentryUnix']) }) if not len(replicas): return self.__logger(logging.DEBUG, "trying to update replicas: %s", replicas) try: start_time = time() for replica in replicas: # if touch replica hits a locked row put the trace back into queue for later retry if not touch_replica(replica): resubmit = { 'filename': replica['name'], 'scope': replica['scope'].external, 'remoteSite': replica['rse'], 'traceTimeentryUnix': replica['traceTimeentryUnix'], 'eventType': 'get', 'usrdn': 'someuser', 'clientState': 'DONE', 'eventVersion': replica['eventVersion'] } if replica['scope'].vo != 'def': resubmit['vo'] = replica['scope'].vo self.__conn.send(body=jdumps(resubmit), destination=self.__queue, headers={ 'appversion': 'rucio', 'resubmitted': '1' }) record_counter('daemons.tracer.kronos.sent_resubmitted') self.__logger(logging.WARNING, 'hit locked row, resubmitted to queue') record_timer('daemons.tracer.kronos.update_atime', (time() - start_time) * 1000) except Exception: self.__logger(logging.ERROR, "Cannot update replicas.", exc_info=True) record_counter('daemons.tracer.kronos.update_error') self.__logger(logging.INFO, 'updated %d replica(s)' % len(replicas))
def test_replica_sorting(self): """ REPLICA (CORE): Test the correct sorting of the replicas across WAN and LAN """ self.rc = ReplicaClient() self.rse1 = 'APERTURE_%s' % rse_name_generator() self.rse2 = 'BLACKMESA_%s' % rse_name_generator() self.rse1_id = add_rse(self.rse1, **self.vo) self.rse2_id = add_rse(self.rse2, **self.vo) add_rse_attribute(rse_id=self.rse1_id, key='site', value='APERTURE') add_rse_attribute(rse_id=self.rse2_id, key='site', value='BLACKMESA') self.files = [{'scope': InternalScope('mock', **self.vo), 'name': 'element_0', 'bytes': 1234, 'adler32': 'deadbeef'}] root = InternalAccount('root', **self.vo) add_replicas(rse_id=self.rse1_id, files=self.files, account=root) add_replicas(rse_id=self.rse2_id, files=self.files, account=root) add_protocol(self.rse1_id, {'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}}) add_protocol(self.rse1_id, {'scheme': 'davs', 'hostname': 'davs.aperture.com', 'port': 443, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 2, 'write': 2, 'delete': 2}, 'wan': {'read': 2, 'write': 2, 'delete': 2}}}) add_protocol(self.rse1_id, {'scheme': 'gsiftp', 'hostname': 'gsiftp.aperture.com', 'port': 8446, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 0, 'write': 0, 'delete': 0}, 'wan': {'read': 3, 'write': 3, 'delete': 3}}}) add_protocol(self.rse2_id, {'scheme': 'gsiftp', 'hostname': 'gsiftp.blackmesa.com', 'port': 8446, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 2, 'write': 2, 'delete': 2}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}}) add_protocol(self.rse2_id, {'scheme': 'davs', 'hostname': 'davs.blackmesa.com', 'port': 443, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 0, 'write': 0, 'delete': 0}, 'wan': {'read': 2, 'write': 2, 'delete': 2}}}) add_protocol(self.rse2_id, {'scheme': 'root', 'hostname': 'root.blackmesa.com', 'port': 1409, 'prefix': '//lambda/complex/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 3, 'write': 3, 'delete': 3}}}) replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'APERTURE'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 5 assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 1 assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 2 assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 3 assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] == 4 assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 5 replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'BLACKMESA'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 5 assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 1 assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 2 assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 3 assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 4 assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] == 5 replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'XEN'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 6 # TODO: intractable until RSE sorting is enabled assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] in [1, 2] assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] in [1, 2] assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] in [3, 4] assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] in [3, 4] assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] in [5, 6] assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] in [5, 6] ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'APERTURE'}) assert 'domain="lan" priority="1" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'BLACKMESA'}) assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml # TODO: intractable until RSE sorting is enabled # ml = self.rc.list_replicas(dids=[{'scope': 'mock', # 'name': f['name'], # 'type': 'FILE'} for f in self.files], # schemes=['root', 'gsiftp', 'davs'], # metalink=True, # client_location={'site': 'XEN'}) # assert 'domain="wan" priority="1">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="2">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="3">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="4">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="5">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="6">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml # assert 'priority="7"' not in ml # ensure correct handling of disabled protocols add_protocol(self.rse1_id, {'scheme': 'root', 'hostname': 'root2.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 0, 'write': 0, 'delete': 0}}}) ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'BLACKMESA'}) assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml delete_replicas(rse_id=self.rse1_id, files=self.files) delete_replicas(rse_id=self.rse2_id, files=self.files) del_rse(self.rse1_id) del_rse(self.rse2_id)
def make_replicas_available(self): """ Marks available replicas for the dataset at rse if they are in PhEDEx """ with monitor.record_timer_block('cms_sync.time_recover_replica'): logging.info('Recovering unavailable replicas for %s:%s at %s', self.scope, self.block_name, self.rse) replicas = list( list_replicas(dids=[{ 'scope': self.scope, 'name': self.block_name }], rse_expression='rse=%s' % self.rse, all_states=True)) logging.info('Recovery: Rucio replicas %s', len(replicas)) ewv_rucio_repl = {repl['name'] for repl in replicas} import pprint logging.info(pprint.pformat(ewv_rucio_repl)) try: unavailable_replicas = { repl['name'] for repl in replicas if repl['states'][self.rse] != 'AVAILABLE' } except TypeError: logging.warn( 'Got a type error, setting unavailable replicas to null') unavailable_replicas = set() logging.info('Recovery: Unavailable replicas %s', len(unavailable_replicas)) phedex_replicas = set(self.replicas.keys()) logging.info('Recovery: PhEDEx replicas %s', len(phedex_replicas)) logging.info('Recovery: PhEDEx %s', pprint.pformat(phedex_replicas)) logging.info('Recovery: Unavailable %s', pprint.pformat(unavailable_replicas)) missing = list(phedex_replicas & unavailable_replicas) logging.info('Recovery: Missing replicas %s', len(missing)) logging.info( 'Recovery for %s:%s at %s: PhEDEx has %s, Rucio unavailable %s. Missing: %s ', self.scope, self.block_name, self.rse, len(phedex_replicas), len(unavailable_replicas), len(missing)) # Fix up things which are unavailable rse_details = get_rse(self.rse) rse_id = rse_details['id'] scope = InternalScope(self.scope) state = 'A' for name in missing: logging.info('Setting available %s:%s at %s', self.scope, name, self.rse) core_update_state(rse_id=rse_id, scope=scope, name=name, state=state) monitor.record_counter('cms_sync.files_made_available', delta=len(missing)) return
def add_files(lfns, account, ignore_availability, session=None): """ Bulk add files : - Create the file and replica. - If doesn't exist create the dataset containing the file as well as a rule on the dataset on ANY sites. - Create all the ascendants of the dataset if they do not exist :param lfns: List of lfn (dictionary {'lfn': <lfn>, 'rse': <rse>, 'bytes': <bytes>, 'adler32': <adler32>, 'guid': <guid>, 'pfn': <pfn>} :param issuer: The issuer account. :param ignore_availability: A boolean to ignore blacklisted sites. :session: The session used """ attachments = [] # The list of scopes is necessary for the extract_scope scopes = list_scopes(session=session) scopes = [scope.external for scope in scopes] exist_lfn = [] for lfn in lfns: # First check if the file exists filename = lfn['lfn'] lfn_scope, _ = extract_scope(filename, scopes) lfn_scope = InternalScope(lfn_scope) exists, did_type = _exists(lfn_scope, filename) if exists: continue # Get all the ascendants of the file lfn_split = filename.split('/') lpns = ["/".join(lfn_split[:idx]) for idx in range(2, len(lfn_split))] lpns.reverse() print(lpns) # The parent must be a dataset. Register it as well as the rule dsn_name = lpns[0] dsn_scope, _ = extract_scope(dsn_name, scopes) dsn_scope = InternalScope(dsn_scope) exists, did_type = _exists(dsn_scope, dsn_name) if exists and did_type == DIDType.CONTAINER: raise UnsupportedOperation('Cannot create %s as dataset' % dsn_name) if (dsn_name not in exist_lfn) and not exists: print('Will create %s' % dsn_name) add_did(scope=dsn_scope, name=dsn_name, type=DIDType.DATASET, account=InternalAccount(account), statuses=None, meta=None, rules=[{'copies': 1, 'rse_expression': 'ANY=true', 'weight': None, 'account': InternalAccount(account), 'lifetime': None, 'GROUPING': 'NONE'}], lifetime=None, dids=None, rse_id=None, session=session) exist_lfn.append(dsn_name) parent_name = lpns[1] parent_scope, _ = extract_scope(parent_name, scopes) parent_scope = InternalScope(parent_scope) attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': dsn_scope, 'name': dsn_name}]}) # Register the file rse_id = lfn.get('rse_id', None) if not rse_id: raise InvalidType('Missing rse_id') bytes = lfn.get('bytes', None) guid = lfn.get('guid', None) adler32 = lfn.get('adler32', None) pfn = lfn.get('pfn', None) files = {'scope': lfn_scope, 'name': filename, 'bytes': bytes, 'adler32': adler32} if pfn: files['pfn'] = str(pfn) if guid: files['meta'] = {'guid': guid} add_replicas(rse_id=rse_id, files=[files], dataset_meta=None, account=InternalAccount(account), ignore_availability=ignore_availability, session=session) add_rule(dids=[{'scope': lfn_scope, 'name': filename}], account=InternalAccount(account), copies=1, rse_expression=lfn['rse'], grouping=None, weight=None, lifetime=86400, locked=None, subscription_id=None, session=session) attachments.append({'scope': dsn_scope, 'name': dsn_name, 'dids': [{'scope': lfn_scope, 'name': filename}]}) # Now loop over the ascendants of the dataset and created them for lpn in lpns[1:]: child_scope, _ = extract_scope(lpn, scopes) child_scope = InternalScope(child_scope) exists, did_type = _exists(child_scope, lpn) if exists and did_type == DIDType.DATASET: raise UnsupportedOperation('Cannot create %s as container' % lpn) if (lpn not in exist_lfn) and not exists: print('Will create %s' % lpn) add_did(scope=child_scope, name=lpn, type=DIDType.CONTAINER, account=InternalAccount(account), statuses=None, meta=None, rules=None, lifetime=None, dids=None, rse_id=None, session=session) exist_lfn.append(lpn) parent_name = lpns[lpns.index(lpn) + 1] parent_scope, _ = extract_scope(parent_name, scopes) parent_scope = InternalScope(parent_scope) attachments.append({'scope': parent_scope, 'name': parent_name, 'dids': [{'scope': child_scope, 'name': lpn}]}) # Finally attach everything attach_dids_to_dids(attachments, account=InternalAccount(account), ignore_duplicate=True, session=session)
def list_replicas(dids, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=False, rse_expression=None, client_location=None, domain=None, signature_lifetime=None, resolve_archives=True, resolve_parents=False, issuer=None): """ List file replicas for a list of data identifiers. :param dids: The list of data identifiers (DIDs). :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...) :param unavailable: Also include unavailable replicas in the list. :param request_id: ID associated with the request for debugging. :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary. :param rse_expression: The RSE expression to restrict replicas on a set of RSEs. :param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site'} :param domain: The network domain for the call, either None, 'wan' or 'lan'. Compatibility fallback: None falls back to 'wan'. :param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN. :param resolve_archives: When set to True, find archives which contain the replicas. :param resolve_parents: When set to True, find all parent datasets which contain the replicas. :param issuer: The issuer account. """ validate_schema(name='r_dids', obj=dids) # Allow selected authenticated users to retrieve signed URLs. # Unauthenticated users, or permission-less users will get the raw URL without the signature. sign_urls = False if permission.has_permission(issuer=issuer, action='get_signed_url', kwargs={}): sign_urls = True for d in dids: d['scope'] = InternalScope(d['scope']) replicas = replica.list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, sign_urls=sign_urls, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents) for rep in replicas: # 'rses' and 'states' use rse_id as the key. This needs updating to be rse. keys = ['rses', 'states'] for k in keys: old_dict = rep.get(k, None) if old_dict is not None: new_dict = {} for rse_id in old_dict: rse = get_rse_name( rse_id=rse_id) if rse_id is not None else None new_dict[rse] = old_dict[rse_id] rep[k] = new_dict rep['scope'] = rep['scope'].external if 'parents' in rep: new_parents = [] for p in rep['parents']: scope, name = p.split(':') scope = InternalScope(scope, fromExternal=False).external new_parents.append('{}:{}'.format(scope, name)) rep['parents'] = new_parents yield rep
def place_replica(once=False, thread=0, did_queue=None, waiting_time=100, dry_run=False, sampling=False, algorithms='t2_free_space_only_pop_with_network', datatypes='NTUP,DAOD', dest_rse_expr='type=DATADISK', max_bytes_hour=100000000000000, max_files_hour=100000, max_bytes_hour_rse=50000000000000, max_files_hour_rse=10000, min_popularity=8, min_recent_requests=5, max_replicas=5, sleep_time=10): """ Thread to run the placement algorithm to decide if and where to put new replicas. """ try: c3po_options = config_get_options('c3po') client = None if 'algorithms' in c3po_options: algorithms = config_get('c3po', 'algorithms') algorithms = algorithms.split(',') if not dry_run: if len(algorithms) != 1: logging.error( 'Multiple algorithms are only allowed in dry_run mode') return client = Client( auth_type='x509_proxy', account='c3po', creds={'client_proxy': '/opt/rucio/etc/ddmadmin.long.proxy'}) vo = client.vo instances = {} for algorithm in algorithms: module_path = 'rucio.daemons.c3po.algorithms.' + algorithm module = __import__(module_path, globals(), locals(), ['PlacementAlgorithm']) instance = module.PlacementAlgorithm( datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas) instances[algorithm] = instance params = { 'dry_run': dry_run, 'sampling': sampling, 'datatypes': datatypes, 'dest_rse_expr': dest_rse_expr, 'max_bytes_hour': max_bytes_hour, 'max_files_hour': max_files_hour, 'max_bytes_hour_rse': max_bytes_hour_rse, 'max_files_hour_rse': max_files_hour_rse, 'min_recent_requests': min_recent_requests, 'min_popularity': min_popularity } instance_id = str(uuid4()).split('-')[0] elastic_url = config_get('c3po', 'elastic_url') elastic_index = config_get('c3po', 'elastic_index') ca_cert = False if 'ca_cert' in c3po_options: ca_cert = config_get('c3po', 'ca_cert') auth = False if ('elastic_user' in c3po_options) and ('elastic_pass' in c3po_options): auth = HTTPBasicAuth(config_get('c3po', 'elastic_user'), config_get('c3po', 'elastic_pass')) w = waiting_time while not GRACEFUL_STOP.is_set(): if w < waiting_time: w += sleep_time sleep(sleep_time) continue len_dids = did_queue.qsize() if len_dids > 0: logging.debug('(%s) %d did(s) in queue' % (instance_id, len_dids)) else: logging.debug('(%s) no dids in queue' % (instance_id)) for _ in range(0, len_dids): did = did_queue.get() if isinstance(did[0], string_types): did[0] = InternalScope(did[0], vo=vo) for algorithm, instance in instances.items(): logging.info( '(%s:%s) Retrieved %s:%s from queue. Run placement algorithm' % (algorithm, instance_id, did[0], did[1])) decision = instance.place(did) decision['@timestamp'] = datetime.utcnow().isoformat() decision['algorithm'] = algorithm decision['instance_id'] = instance_id decision['params'] = params create_rule = True if sampling and 'error_reason' not in decision: create_rule = bool( ord(md5(decision['did']).hexdigest()[-1]) & 1) decision['create_rule'] = create_rule # write the output to ES for further analysis index_url = elastic_url + '/' + elastic_index + '-' + datetime.utcnow( ).strftime('%Y-%m') + '/record/' try: if ca_cert: r = post(index_url, data=dumps(decision), verify=ca_cert, auth=auth) else: r = post(index_url, data=dumps(decision)) if r.status_code != 201: logging.error(r) logging.error( '(%s:%s) could not write to ElasticSearch' % (algorithm, instance_id)) except RequestException as e: logging.error( '(%s:%s) could not write to ElasticSearch' % (algorithm, instance_id)) logging.error(e) continue logging.debug(decision) if 'error_reason' in decision: logging.error( '(%s:%s) The placement algorithm ran into an error: %s' % (algorithm, instance_id, decision['error_reason'])) continue logging.info( '(%s:%s) Decided to place a new replica for %s on %s' % (algorithm, instance_id, decision['did'], decision['destination_rse'])) if (not dry_run) and create_rule: # DO IT! try: add_rule(client, { 'scope': did[0].external, 'name': did[1] }, decision.get('source_rse'), decision.get('destination_rse')) except exception.RucioException as e: logging.debug(e) w = 0 except Exception as e: logging.critical(e)