def test_bb8_rebalance_rule(vo, root_account, jdoe_account, rse_factory, mock_scope, did_factory): """BB8: Test the rebalance rule method""" rse1, rse1_id = rse_factory.make_posix_rse() rse2, rse2_id = rse_factory.make_posix_rse() # Add Tags T1 = tag_generator() T2 = tag_generator() add_rse_attribute(rse1_id, T1, True) add_rse_attribute(rse2_id, T2, True) # Add fake weights add_rse_attribute(rse1_id, "fakeweight", 10) add_rse_attribute(rse2_id, "fakeweight", 0) # Add quota set_local_account_limit(jdoe_account, rse1_id, -1) set_local_account_limit(jdoe_account, rse2_id, -1) set_local_account_limit(root_account, rse1_id, -1) set_local_account_limit(root_account, rse2_id, -1) files = create_files(3, mock_scope, rse1_id) dataset = did_factory.make_dataset() attach_dids(mock_scope, dataset['name'], files, jdoe_account) set_status(mock_scope, dataset['name'], open=False) # Invalid the cache because the result of parse_expression is cached REGION.invalidate() rule_id = add_rule(dids=[{'scope': mock_scope, 'name': dataset['name']}], account=jdoe_account, copies=1, rse_expression=rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] rule = {} try: rule = get_rule(rule_id) except: pytest.raises(RuleNotFound, get_rule, rule_id) child_rule = rebalance_rule(rule, 'Rebalance', rse2, priority=3) rule_cleaner(once=True) assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow()) assert(get_rule(rule_id)['child_rule_id'] == child_rule) rule_cleaner(once=True) assert(get_rule(rule_id)['expires_at'] <= datetime.utcnow()) successful_transfer(scope=mock_scope, name=files[0]['name'], rse_id=rse2_id, nowait=False) successful_transfer(scope=mock_scope, name=files[1]['name'], rse_id=rse2_id, nowait=False) with pytest.raises(UnsupportedOperation): delete_rule(rule_id) successful_transfer(scope=mock_scope, name=files[2]['name'], rse_id=rse2_id, nowait=False) rule_cleaner(once=True) assert(get_rule(child_rule)['state'] == RuleState.OK) set_metadata(mock_scope, dataset['name'], 'lifetime', -86400) undertaker.run(once=True)
def set_status(scope, name, issuer, vo='def', **kwargs): """ Set data identifier status :param scope: The scope name. :param name: The data identifier name. :param issuer: The issuer account. :param kwargs: Keyword arguments of the form status_name=value. :param vo: The VO to act on. """ if not rucio.api.permission.has_permission(issuer=issuer, vo=vo, action='set_status', kwargs={ 'scope': scope, 'name': name, 'issuer': issuer }): raise rucio.common.exception.AccessDenied( 'Account %s can not set status on data identifier %s:%s' % (issuer, scope, name)) scope = InternalScope(scope, vo=vo) return did.set_status(scope=scope, name=name, **kwargs)
def test_delete_replicas_from_datasets(self): """ REPLICA (CORE): Delete replicas from dataset """ tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, rse='MOCK', dids=files1, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn2, dids=files1, account='root') set_status(scope=tmp_scope, name=tmp_dsn1, open=False) delete_replicas(rse='MOCK', files=files1) with assert_raises(DataIdentifierNotFound): get_did(scope=tmp_scope, name=tmp_dsn1) get_did(scope=tmp_scope, name=tmp_dsn2) assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)], [])
def test_dataset_callback_no(self): """ REPLICATION RULE (CORE): Test dataset callback should not be sent""" scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') set_status(scope=scope, name=dataset, open=False) rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, notify='C')[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse3_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse3_id, nowait=False) # Check if rule exists assert(False == check_dataset_ok_callback(scope, dataset, self.rse3, rule_id))
def set_status(scope, name, issuer, **kwargs): """ Set data identifier status :param scope: The scope name. :param name: The data identifier name. :param issuer: The issuer account. :param kwargs: Keyword arguments of the form status_name=value. """ if not rucio.api.permission.has_permission(issuer=issuer, action='set_status', kwargs={'scope': scope, 'name': name, 'issuer': issuer}): raise rucio.common.exception.AccessDenied('Account %s can not set status on data identifier %s:%s' % (issuer, scope, name)) return did.set_status(scope=scope, name=name, **kwargs)
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account, caches_mock, file_config_mock): [cache_region] = caches_mock rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i)} for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes_=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i)} for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes_=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[{'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size} for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule]], account=account, **archive2) dataset1, dataset2 = [{'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i)} for i in range(2)] did_core.add_did(did_type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(did_type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by(**archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be removed, and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exists only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) cache_region.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3
def test_bb8_full_workflow(vo, root_account, jdoe_account, rse_factory, mock_scope, did_factory): """BB8: Test the rebalance rule method""" config_core.set(section='bb8', option='allowed_accounts', value='jdoe') tot_rses = 4 rses = [rse_factory.make_posix_rse() for _ in range(tot_rses)] rse1, rse1_id = rses[0] rse2, rse2_id = rses[1] rse3, rse3_id = rses[2] rse4, rse4_id = rses[3] # Add Tags # RSE 1 and 2 nmatch expression T1=true # RSE 3 and 4 nmatch expression T2=true T1 = tag_generator() T2 = tag_generator() add_rse_attribute(rse1_id, T1, True) add_rse_attribute(rse2_id, T1, True) add_rse_attribute(rse3_id, T2, True) add_rse_attribute(rse4_id, T2, True) # Add fake weights add_rse_attribute(rse1_id, "fakeweight", 10) add_rse_attribute(rse2_id, "fakeweight", 0) add_rse_attribute(rse3_id, "fakeweight", 0) add_rse_attribute(rse4_id, "fakeweight", 0) add_rse_attribute(rse1_id, "freespace", 1) add_rse_attribute(rse2_id, "freespace", 1) add_rse_attribute(rse3_id, "freespace", 1) add_rse_attribute(rse4_id, "freespace", 1) # Add quota set_local_account_limit(jdoe_account, rse1_id, -1) set_local_account_limit(jdoe_account, rse2_id, -1) set_local_account_limit(jdoe_account, rse3_id, -1) set_local_account_limit(jdoe_account, rse4_id, -1) set_local_account_limit(root_account, rse1_id, -1) set_local_account_limit(root_account, rse2_id, -1) set_local_account_limit(root_account, rse3_id, -1) set_local_account_limit(root_account, rse4_id, -1) # Invalid the cache because the result of parse_expression is cached REGION.invalidate() tot_datasets = 4 # Create a list of datasets datasets = [did_factory.make_dataset() for _ in range(tot_datasets)] dsn = [dataset['name'] for dataset in datasets] rules = list() base_unit = 100000000000 nb_files1 = 7 nb_files2 = 5 nb_files3 = 3 nb_files4 = 2 file_size = 1 * base_unit rule_to_rebalance = None # Add one secondary file files = create_files(1, mock_scope, rse1_id, bytes_=1) add_rule(dids=[{ 'scope': mock_scope, 'name': files[0]['name'] }], account=jdoe_account, copies=1, rse_expression=rse1, grouping='DATASET', weight=None, lifetime=-86400, locked=False, subscription_id=None)[0] for cnt in range(3, tot_rses): add_replicas(rses[cnt][1], files, jdoe_account) add_rule(dids=[{ 'scope': mock_scope, 'name': files[0]['name'] }], account=jdoe_account, copies=1, rse_expression=rses[cnt][0], grouping='DATASET', weight=None, lifetime=-86400, locked=False, subscription_id=None)[0] rule_cleaner(once=True) # Create dataset 1 of 800 GB and create a rule on RSE 1 and RSE 3 files = create_files(nb_files1, mock_scope, rse1_id, bytes_=file_size) attach_dids(mock_scope, dsn[0], files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[0] }], account=jdoe_account, copies=1, rse_expression=rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rules.append(rule_id) add_replicas(rse3_id, files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[0] }], account=jdoe_account, copies=1, rse_expression=rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rules.append(rule_id) # Create dataset 2 of 500 GB and create a rule on RSE 1 and RSE 2 files = create_files(nb_files2, mock_scope, rse1_id, bytes_=file_size) attach_dids(mock_scope, dsn[1], files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[1] }], account=jdoe_account, copies=1, rse_expression=rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rules.append(rule_id) add_replicas(rse2_id, files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[1] }], account=jdoe_account, copies=1, rse_expression=rse2, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rules.append(rule_id) # Create dataset 3 of 300 GB and create a rule on RSE 1. The copy on RSE 3 is secondary files = create_files(nb_files3, mock_scope, rse1_id, bytes_=file_size) attach_dids(mock_scope, dsn[2], files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[2] }], account=jdoe_account, copies=1, rse_expression=rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rule_to_rebalance = rule_id rules.append(rule_id) add_replicas(rse3_id, files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[2] }], account=jdoe_account, copies=1, rse_expression=rse3, grouping='DATASET', weight=None, lifetime=-86400, locked=False, subscription_id=None)[0] rule_cleaner(once=True) try: rule = get_rule(rule_id) except: pytest.raises(RuleNotFound, get_rule, rule_id) # Create dataset 4 of 200 GB and create a rule on RSE 3. The copy on RSE 2 is secondary files = create_files(nb_files4, mock_scope, rse3_id, bytes_=file_size) attach_dids(mock_scope, dsn[3], files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[3] }], account=jdoe_account, copies=1, rse_expression=rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rules.append(rule_id) add_replicas(rse2_id, files, jdoe_account) rule_id = add_rule(dids=[{ 'scope': mock_scope, 'name': dsn[3] }], account=jdoe_account, copies=1, rse_expression=rse2, grouping='DATASET', weight=None, lifetime=-86400, locked=False, subscription_id=None)[0] rule_cleaner(once=True) try: rule = get_rule(rule_id) except: pytest.raises(RuleNotFound, get_rule, rule_id) for dataset in dsn: set_status(mock_scope, dataset, open=False) for rse in rses: fill_rse_expired(rse[1]) set_rse_usage(rse_id=rse[1], source='min_free_space', used=2 * base_unit, free=2 * base_unit, session=None) set_rse_usage(rse_id=rse[1], source='storage', used=15 * base_unit, free=2 * base_unit, session=None) set_rse_usage(rse_id=rse2_id, source='min_free_space', used=1 * base_unit, free=1 * base_unit, session=None) set_rse_usage(rse_id=rse2_id, source='storage', used=6 * base_unit, free=5 * base_unit, session=None) run_abacus(once=True, threads=1, fill_history_table=False, sleep_time=10) # Summary : # RSE 1 : 1500 GB primary + 1 B secondary tot_space = [ src for src in get_rse_usage(rse1_id) if src['source'] == 'rucio' ][0] expired = [ src for src in get_rse_usage(rse1_id) if src['source'] == 'expired' ][0] assert tot_space['used'] == (nb_files1 + nb_files2 + nb_files3) * file_size + 1 assert expired['used'] == 1 # RSE 2 : 500 GB primary + 100 GB secondary tot_space = [ src for src in get_rse_usage(rse2_id) if src['source'] == 'rucio' ][0] expired = [ src for src in get_rse_usage(rse2_id) if src['source'] == 'expired' ][0] assert tot_space['used'] == (nb_files2 + nb_files4) * file_size assert expired['used'] == nb_files4 * file_size # Total primary on T1=true : 2000 GB # Total secondary on T1=true : 200 GB # Ratio secondary / primary = 10 % # Ratio on RSE 1 : 0 % # Ratio on RSE 2 : 40 % # Now run BB8 re_evaluator(once=True, sleep_time=30, did_limit=100) bb8_run(once=True, rse_expression='%s=true' % str(T1), move_subscriptions=False, use_dump=False, sleep_time=300, threads=1, dry_run=False) for rule_id in rules: rule = get_rule(rule_id) if rule_id != rule_to_rebalance: assert (rule['child_rule_id'] is None) else: assert (rule['child_rule_id'] is not None) assert ( rule['expires_at'] <= datetime.utcnow() + timedelta(seconds=1) ) # timedelta needed to prevent failure due to rounding effects child_rule_id = rule['child_rule_id'] child_rule = get_rule(child_rule_id) assert (child_rule['rse_expression'] == rse2) # For teardown, delete child rule update_rule(child_rule_id, {'lifetime': -86400}) rule_cleaner(once=True) for dataset in dsn: set_metadata(mock_scope, dataset, 'lifetime', -86400) undertaker.run(once=True)
add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, rse='MOCK', dids=files1, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn2, dids=files1, account='root') set_status(scope=tmp_scope, name=tmp_dsn1, open=False) delete_replicas(rse='MOCK', files=files1) with assert_raises(DataIdentifierNotFound): get_did(scope=tmp_scope, name=tmp_dsn1) get_did(scope=tmp_scope, name=tmp_dsn2) assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)], []) def test_update_lock_counter(self): """ RSE (CORE): Test the update of a replica lock counter """ rse = 'MOCK' tmp_scope = 'mock'
def test_delete_replicas_from_datasets(self): """ REPLICA (CORE): Delete replicas from dataset """ tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() tmp_dsn2 = 'dsn_%s' % generate_uuid() nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, rse='MOCK', dids=files1, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn2, dids=files1, account='root') set_status(scope=tmp_scope, name=tmp_dsn1, open=False) delete_replicas(rse='MOCK', files=files1) with assert_raises(DataIdentifierNotFound): get_did(scope=tmp_scope, name=tmp_dsn1) get_did(scope=tmp_scope, name=tmp_dsn2) assert_equal([f for f in list_files(scope=tmp_scope, name=tmp_dsn2)], []) def test_update_lock_counter(self): """ RSE (CORE): Test the update of a replica lock counter """ rse = 'MOCK' tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid()