def test_get_did_with_dynamic(self): """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size""" tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() tmp_dsn3 = 'dsn_%s' % generate_uuid() tmp_dsn4 = 'dsn_%s' % generate_uuid() add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_replica(rse='MOCK', scope=tmp_scope, name=tmp_dsn2, bytes=10, account='root') add_replica(rse='MOCK', scope=tmp_scope, name=tmp_dsn3, bytes=10, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, dids=[{ 'scope': tmp_scope, 'name': tmp_dsn2 }, { 'scope': tmp_scope, 'name': tmp_dsn3 }], account='root') add_did(scope=tmp_scope, name=tmp_dsn4, type=DIDType.CONTAINER, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn4, dids=[{ 'scope': tmp_scope, 'name': tmp_dsn1 }], account='root') assert_equal( get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'], 20) assert_equal( get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'], 20)
def check_did(self, did): decision = {'did': ':'.join(did)} if (self._added_cache.check_dataset(':'.join(did))): decision['error_reason'] = 'already added replica for this did in the last 24h' return decision if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision datatype = did[1].split('.')[4].split('_')[0] if datatype not in self._datatypes: decision['error_reason'] = 'wrong datatype' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] total_added_bytes = sum(self._added_bytes.get_series('total')) total_added_files = sum(self._added_files.get_series('total')) logging.debug("total_added_bytes: %d" % total_added_bytes) logging.debug("total_added_files: %d" % total_added_files) if ((total_added_bytes + meta['bytes']) > self._max_bytes_hour): decision['error_reason'] = 'above bytes limit of %d bytes' % self._max_bytes_hour return decision if ((total_added_files + meta['length']) > self._max_files_hour): decision['error_reason'] = 'above files limit of %d files' % self._max_files_hour return decision last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses try: pop = get_popularity(did) decision['popularity'] = pop or 0.0 except Exception: decision['error_reason'] = 'problems connecting to ES' return decision if (last_accesses < self._min_recent_requests) and (pop < self._min_popularity): decision['error_reason'] = 'did not popular enough' return decision return decision
def test_delete_replicas_from_datasets(self): """ REPLICA (CORE): Delete replicas from dataset """ tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, rse='MOCK', dids=files1, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn2, dids=files1, account='root') set_status(scope=tmp_scope, name=tmp_dsn1, open=False) delete_replicas(rse='MOCK', files=files1) with assert_raises(DataIdentifierNotFound): get_did(scope=tmp_scope, name=tmp_dsn1) get_did(scope=tmp_scope, name=tmp_dsn2) assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)], [])
def test_delete_replicas(self): """ REPLICA (CORE): Delete replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True) files2 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True) add_replicas(rse='MOCK3', files=files2, account='root', ignore_availability=True) delete_replicas(rse='MOCK', files=files1 + files2) for file in files1: with assert_raises(DataIdentifierNotFound): print(get_did(scope=file['scope'], name=file['name'])) for file in files2: get_did(scope=file['scope'], name=file['name'])
def get_did(scope, name, dynamic=False): """ Retrieve a single data did. :param scope: The scope name. :param name: The data identifier name. :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise """ return did.get_did(scope=scope, name=name, dynamic=dynamic)
def get_did(scope, name): """ Retrieve a single data did. :param scope: The scope name. :param name: The data identifier name. :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise """ return did.get_did(scope=scope, name=name)
def test_get_did_with_dynamic(self): """ DATA IDENTIFIERS (CORE): Get did with dynamic resolve of size""" tmp_scope = InternalScope('mock', **self.vo) root = InternalAccount('root', **self.vo) tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() tmp_dsn3 = 'dsn_%s' % generate_uuid() tmp_dsn4 = 'dsn_%s' % generate_uuid() rse_id = get_rse_id(rse='MOCK', **self.vo) add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account=root) add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn2, bytes=10, account=root) add_replica(rse_id=rse_id, scope=tmp_scope, name=tmp_dsn3, bytes=10, account=root) attach_dids(scope=tmp_scope, name=tmp_dsn1, dids=[{'scope': tmp_scope, 'name': tmp_dsn2}, {'scope': tmp_scope, 'name': tmp_dsn3}], account=root) add_did(scope=tmp_scope, name=tmp_dsn4, type=DIDType.CONTAINER, account=root) attach_dids(scope=tmp_scope, name=tmp_dsn4, dids=[{'scope': tmp_scope, 'name': tmp_dsn1}], account=root) assert get_did(scope=tmp_scope, name=tmp_dsn1, dynamic=True)['bytes'] == 20 assert get_did(scope=tmp_scope, name=tmp_dsn4, dynamic=True)['bytes'] == 20
def get_did(scope, name, dynamic=False, vo='def'): """ Retrieve a single data did. :param scope: The scope name. :param name: The data identifier name. :param dynamic: Dynamically resolve the bytes and length of the did :param vo: The VO to act on. :return did: Dictionary containing {'name', 'scope', 'type'}, Exception otherwise """ scope = InternalScope(scope, vo=vo) d = did.get_did(scope=scope, name=name, dynamic=dynamic) return api_update_return_dict(d)
def test_atlas_archival_policy(self): """ UNDERTAKER (CORE): Test the atlas archival policy. """ tmp_scope = 'mock' nbdatasets = 5 nbfiles = 5 rse = 'LOCALGROUPDISK_%s' % rse_name_generator() add_rse(rse) set_account_limit('jdoe', get_rse_id(rse), -1) dsns2 = [{'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{'account': 'jdoe', 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET'}]} for i in range(nbdatasets)] add_dids(dids=dsns2, account='root') replicas = list() for dsn in dsns2: files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': {'events': 10}} for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse=rse, dids=files, account='root') replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert(get_replica(scope=replica['scope'], name=replica['name'], rse=rse)['tombstone'] is None) for dsn in dsns2: assert(get_did(scope='archive', name=dsn['name'])['name'] == dsn['name']) assert(len([x for x in list_rules(filters={'scope': 'archive', 'name': dsn['name']})]) == 1)
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses pop = get_popularity(did) decision['popularity'] = pop or 0.0 if (last_accesses < 5) and (pop < 10.0): decision['error_reason'] = 'did not popular enough' return decision free_rses = self._rses available_reps = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if rep['state'] == ReplicaState.AVAILABLE: if rep['rse'] in free_rses: free_rses.remove(rep['rse']) available_reps.append(rep['rse']) num_reps += 1 decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision rse_ratios = {} space_info = self._fsc.get_rse_space() for rse in free_rses: rse_space = space_info[rse] penalty = self._penalties[rse] rse_ratios[rse] = float(rse_space['free']) / float( rse_space['total']) * 100.0 / penalty sorted_rses = sorted(rse_ratios.items(), key=itemgetter(1), reverse=True) decision['destination_rse'] = sorted_rses[0][0] decision['rse_ratios'] = sorted_rses self._penalties[sorted_rses[0][0]] = 10.0 return decision
def test_atlas_archival_policy(self): """ UNDERTAKER (CORE): Test the atlas archival policy. """ if get_policy() != 'atlas': LOG.info("Skipping atlas-specific test") return tmp_scope = InternalScope('mock', **self.vo) jdoe = InternalAccount('jdoe', **self.vo) root = InternalAccount('root', **self.vo) nbdatasets = 5 nbfiles = 5 rse = 'LOCALGROUPDISK_%s' % rse_name_generator() rse_id = add_rse(rse, **self.vo) set_local_account_limit(jdoe, rse_id, -1) dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for _ in range(nbdatasets)] add_dids(dids=dsns2, account=root) replicas = list() for dsn in dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for _ in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert (get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'] is None) for dsn in dsns2: assert (get_did(scope=InternalScope('archive', **self.vo), name=dsn['name'])['name'] == dsn['name']) assert (len([ x for x in list_rules( filters={ 'scope': InternalScope('archive', **self.vo), 'name': dsn['name'] }) ]) == 1)
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock): [cache_region] = caches_mock rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]], account=account, **archive2) dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)] did_core.add_did(did_type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(did_type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by(**archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be removed, and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exists only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3
def test_abacus_collection_replica_new(vo, rse_factory, rucio_client, did_factory, core_config_mock, caches_mock): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ file_sizes = 2 nfiles = 2 dataset_scope = 'mock' rse, rse_id = rse_factory.make_posix_rse() dids = did_factory.upload_test_dataset(rse_name=rse, scope=dataset_scope, size=file_sizes, nb_files=nfiles) files = [{ 'scope': did['did_scope'], 'name': did['did_name'] } for did in dids] dataset = dids[0]['dataset_name'] rucio_client.set_metadata(dataset_scope, dataset, 'lifetime', -1) rucio_client.add_replication_rule([{ 'scope': dataset_scope, 'name': dataset }], 1, rse, lifetime=-1) # Check dataset replica after rule creation - initial data dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['bytes'] == 0 assert dataset_replica['length'] == 0 assert dataset_replica['available_bytes'] == 0 assert dataset_replica['available_length'] == 0 assert str(dataset_replica['state']) == 'UNAVAILABLE' # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['bytes'] == len(files) * file_sizes assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_sizes assert dataset_replica['available_length'] == len(files) assert str(dataset_replica['state']) == 'AVAILABLE' # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=rse_id, files=[{ 'name': files[0]['name'], 'scope': InternalScope(dataset_scope, vo) }]) rucio_client.add_replication_rule([{ 'scope': dataset_scope, 'name': dataset }], 1, rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in rucio_client.list_dataset_replicas( dataset_scope, dataset) ][0] assert dataset_replica['length'] == len(files) assert dataset_replica['bytes'] == len(files) * file_sizes assert dataset_replica['available_length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_sizes assert str(dataset_replica['state']) == 'UNAVAILABLE' # Delete all files -> collection replica should be deleted # New behaviour (dataset should be deleted) cleaner.run(once=True) delete_replicas(rse_id=rse_id, files=[{ 'name': files[1]['name'], 'scope': InternalScope(dataset_scope, vo) }]) with pytest.raises(DataIdentifierNotFound): get_did(scope=InternalScope(dataset_scope), name=dataset)
'bytes': 1L, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in xrange(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse=rse, dids=files, account='root') replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert (get_replica(scope=replica['scope'], name=replica['name'], rse=rse)['tombstone'] is None) for dsn in dsns2: assert (get_did(scope='archive', name=dsn['name'])['name'] == dsn['name']) assert (len([ x for x in list_rules(filters={ 'scope': 'archive', 'name': dsn['name'] }) ]) == 1)
def test_delete_replicas(self): """ REPLICA (CORE): Delete replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True) files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True) add_replicas(rse='MOCK3', files=files2, account='root', ignore_availability=True) delete_replicas(rse='MOCK', files=files1 + files2) for file in files1: with assert_raises(DataIdentifierNotFound): get_did(scope=file['scope'], name=file['name']) for file in files2: get_did(scope=file['scope'], name=file['name']) def test_delete_replicas_from_datasets(self): """ REPLICA (CORE): Delete replicas from dataset """ tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root')
} } for i in xrange(nbfiles)] add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True) add_replicas(rse='MOCK3', files=files2, account='root', ignore_availability=True) delete_replicas(rse='MOCK', files=files1 + files2) for file in files1: with assert_raises(DataIdentifierNotFound): print get_did(scope=file['scope'], name=file['name']) for file in files2: get_did(scope=file['scope'], name=file['name']) def test_delete_replicas_from_datasets(self): """ REPLICA (CORE): Delete replicas from dataset """ tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb',
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] available_rses = [] available_sites = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: if rep['state'] == ReplicaState.AVAILABLE: available_rses.append(rep['rse']) available_sites.append(self._mc.ddm_to_site(rep['rse'])) num_reps += 1 decision['replica_rses'] = available_rses decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision site_ratios = {} site_job_info = {} for panda_site in self._wc.get_sites(): site = self._mc.panda_to_site(panda_site) job_info = self._wc.get_job_info(panda_site) ratio = float( job_info[0]) / (float(job_info[1]) + float(job_info[2]) / 2) penalty = self._penalties[site] site_ratios[site] = ratio * penalty site_job_info[site] = (job_info, penalty) decision['site_ratios'] = site_ratios decision['site_job_info'] = site_job_info picked_site = None picked_rse = None for site, _ in sorted(site_ratios.items(), key=itemgetter(1)): if site in available_sites: continue rses_for_site = self._mc.site_to_ddm(site) if rses_for_site is None: continue for rse in rses_for_site: if 'DATADISK' in rse: picked_rse = rse picked_site = site break if picked_rse: break if picked_rse is None: decision['error_reason'] = 'could not pick RSE' return decision decision['destination_rse'] = picked_rse if picked_site: self._penalties[site] = 1 picked_source = None shuffle(available_rses) for rse in available_rses: if 'TAPE' in rse: continue picked_source = rse break if picked_source is None: picked_source = available_rses[0] decision['source_rse'] = picked_source logging.debug("Picked %s as source and %s as destination RSE" % (picked_source, picked_rse)) return decision
def place(self, did): self.__update_penalties() self._added_bytes.trim() self._added_files.trim() decision = self.check_did(did) if 'error_reason' in decision: return decision meta = get_did(did[0], did[1]) available_reps = {} reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 space_info = self._fsc.get_rse_space() max_mbps = 0.0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) src_rse = rep['rse'] if 'site' not in rse_attr: continue src_site = rse_attr['site'] src_rse_info = get_rse(src_rse) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if src_rse_info['availability'] & 4 == 0: continue if rep['state'] == ReplicaState.AVAILABLE: if rep['available_length'] == 0: continue net_metrics = {} net_metrics_type = None for metric_type in ('fts', 'fax', 'perfsonar', 'dashb'): net_metrics_type = metric_type net_metrics = self._nmc.getMbps(src_site, metric_type) if net_metrics: break if len(net_metrics) == 0: continue available_reps[src_rse] = {} for dst_site, mbps in net_metrics.items(): if src_site == dst_site: continue if dst_site in self._sites: if mbps > max_mbps: max_mbps = mbps dst_rse = self._sites[dst_site]['rse'] dst_rse_info = get_rse(dst_rse) if dst_rse_info['availability'] & 2 == 0: continue site_added_bytes = sum(self._added_bytes.get_series(dst_rse)) site_added_files = sum(self._added_files.get_series(dst_rse)) if ((site_added_bytes + meta['bytes']) > self._max_bytes_hour_rse): continue if ((site_added_files + meta['length']) > self._max_files_hour_rse): continue queued = self._nmc.getQueuedFiles(src_site, dst_site) # logging.debug('queued %s -> %s: %d' % (src_site, dst_site, queued)) if queued > 0: continue rse_space = space_info.get(dst_rse, {'free': 0, 'total': 1}) if src_rse not in self._src_penalties: self._src_penalties[src_rse] = 100.0 src_penalty = self._src_penalties[src_rse] if dst_rse not in self._dst_penalties: self._dst_penalties[dst_rse] = 100.0 dst_penalty = self._dst_penalties[dst_rse] free_space = float(rse_space['free']) / float(rse_space['total']) * 100.0 available_reps[src_rse][dst_rse] = {'free_space': free_space, 'src_penalty': src_penalty, 'dst_penalty': dst_penalty, 'mbps': float(mbps), 'metrics_type': net_metrics_type} num_reps += 1 # decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision src_dst_ratios = [] if max_mbps == 0.0: decision['error_reason'] = 'could not find enough network metrics' return decision for src, dsts in available_reps.items(): for dst, metrics in dsts.items(): if dst in available_reps: continue bdw = (metrics['mbps'] / max_mbps) * 100.0 src_penalty = self._src_penalties[src] dst_penalty = self._dst_penalties[dst] ratio = ((metrics['free_space'] / 4.0) + bdw) * src_penalty * dst_penalty src_dst_ratios.append((src, dst, ratio)) if len(src_dst_ratios) == 0: decision['error_reason'] = 'found no suitable src/dst for replication' return decision sorted_ratios = sorted(src_dst_ratios, key=itemgetter(2), reverse=True) logging.debug(sorted_ratios) destination_rse = sorted_ratios[0][1] source_rse = sorted_ratios[0][0] decision['destination_rse'] = destination_rse decision['source_rse'] = source_rse # decision['rse_ratios'] = src_dst_ratios self._dst_penalties[destination_rse] = 10.0 self._src_penalties[source_rse] = 10.0 self._added_cache.add_dataset(':'.join(did)) self._added_bytes.add_point(destination_rse, meta['bytes']) self._added_files.add_point(destination_rse, meta['length']) self._added_bytes.add_point('total', meta['bytes']) self._added_files.add_point('total', meta['length']) return decision