def test_repair_a_rule_with_source_replica_expression(self): """ JUDGE EVALUATOR: Test the judge when a with two rules with source_replica_expression""" scope = 'mock' files = create_files(3, scope, self.rse4) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') # Add a first rule to the DS rule_id1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rule_id2 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, source_replica_expression=self.rse1)[0] assert(RuleState.REPLICATING == get_rule(rule_id1)['state']) assert(RuleState.STUCK == get_rule(rule_id2)['state']) successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[2]['name'], rse_id=self.rse1_id, nowait=False) # Also make replicas AVAILABLE session = get_session() replica = session.query(models.RSEFileAssociation).filter_by(scope=scope, name=files[0]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by(scope=scope, name=files[1]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by(scope=scope, name=files[2]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE session.commit() rule_repairer(once=True) assert(RuleState.OK == get_rule(rule_id1)['state']) assert(RuleState.REPLICATING == get_rule(rule_id2)['state'])
def test_account_counter_judge_evaluate_detach(self): """ JUDGE EVALUATOR: Test if the account counter is updated correctly when a file is removed from a DS""" re_evaluator(once=True) account_update(once=True) scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') # Add a first rule to the DS add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) account_update(once=True) account_counter_before = get_counter(self.rse1_id, 'jdoe') detach_dids(scope, dataset, [files[0]]) # Fake judge re_evaluator(once=True) account_update(once=True) account_counter_after = get_counter(self.rse1_id, 'jdoe') assert(account_counter_before['bytes'] - 100 == account_counter_after['bytes']) assert(account_counter_before['files'] - 1 == account_counter_after['files'])
def test_add_rule_container_dataset_with_weights(self): """ REPLICATION RULE (CORE): Add a replication rule on a container, DATASET Grouping, WEIGHTS""" scope = 'mock' container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe') all_files = [] dataset_files = [] for i in xrange(3): files = create_files(3, scope, self.rse1) all_files.extend(files) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') attach_dids(scope, container, [{'scope': scope, 'name': dataset}], 'jdoe') dataset_files.append({'scope': scope, 'name': dataset, 'files': files}) add_rule(dids=[{'scope': scope, 'name': container}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight='fakeweight', lifetime=None, locked=False, subscription_id=None) t1 = set([self.rse1_id, self.rse3_id, self.rse5_id]) for dataset in dataset_files: first_locks = None for file in dataset['files']: if first_locks is None: first_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) assert(len(t1.intersection(rse_locks)) == 2) assert(len(first_locks.intersection(rse_locks)) == 2) assert_in(self.rse1_id, rse_locks)
def test_judge_evaluate_detach_datasetlock(self): """ JUDGE EVALUATOR: Test if the a datasetlock is detached correctly when removing a dataset from a container""" re_evaluator(once=True) scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe') attach_dids(scope, container, [{'scope': scope, 'name': dataset}], 'jdoe') # Add a rule to the Container add_rule(dids=[{'scope': scope, 'name': container}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the datasetlock is there locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) > 0) detach_dids(scope, container, [{'scope': scope, 'name': dataset}]) # Fake judge re_evaluator(once=True) locks = [ds_lock for ds_lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(locks) == 0)
def test_to_repair_a_rule_with_only_1_rse_whose_transfers_failed(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose transfers failed (lock)""" rule_repairer(once=True) # Clean out the repairer scope = 'mock' files = create_files(4, scope, self.rse4, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer(scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer(scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[2]['name'], dest_rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) cancel_request_did(scope=scope, name=files[3]['name'], dest_rse_id=get_replica_locks(scope=files[3]['scope'], name=files[2]['name'])[0].rse_id) assert(rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert(RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) # Stil assert STUCK because of delays: assert(RuleState.STUCK == get_rule(rule_id)['state']) assert(get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def test_add_rule(self): """ REPLICATION RULE (CLIENT): Add a replication rule """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') ret = self.rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE') assert_is_instance(ret, list)
def test_get_rule(self): """ REPLICATION RULE (CORE): Test to get a previously created rule""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] assert(rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert_raises(RuleNotFound, get_rule, uuid())
def test_dataset_lock(self): """ DATASETLOCK (CLIENT): Get a datasetlock for a specific dataset""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0] rule_ids = [lock['rule_id'] for lock in self.lock_client.get_dataset_locks(scope=scope, name=dataset)] assert_in(rule_id_1, rule_ids)
def test_delete_rule(self): """ REPLICATION RULE (CLIENT): Delete a replication rule """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] ret = self.rule_client.delete_replication_rule(rule_id=rule_id) assert(ret is True) assert_raises(RuleNotFound, self.rule_client.delete_replication_rule, rule_id)
def test_get_rule_by_account(self): """ ACCOUNT (CLIENT): Get Replication Rule by account """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') ret = self.rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE') get = self.account_client.list_account_rules('jdoe') rules = [rule['id'] for rule in get] assert_in(ret[0], rules)
def test_locked_rule(self): """ REPLICATION RULE (CLIENT): Delete a locked replication rule""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0] assert_raises(AccessDenied, delete_rule, rule_id_1) self.rule_client.update_replication_rule(rule_id=rule_id_1, options={'locked': False}) delete_rule(rule_id=rule_id_1)
def test_add_rule_duplicate(self): """ REPLICATION RULE (CORE): Add a replication rule duplicate""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') # Add a first rule to the DS add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) # Add a second rule and check if the right locks are created assert_raises(DuplicateRule, add_rule, dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None)
def test_delete_rule(self): """ REPLICATION RULE (CORE): Test to delete a previously created rule""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] delete_rule(rule_id) for file in files: rse_locks = get_replica_locks(scope=file['scope'], name=file['name']) assert(len(rse_locks) == 0) assert_raises(RuleNotFound, delete_rule, uuid())
def test_rule_add_fails_account_limit(self): """ REPLICATION RULE (CORE): Test if a rule fails correctly when account limit conflict""" scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') set_account_limit(account='jdoe', rse_id=self.rse1_id, bytes=5) assert_raises(InsufficientAccountLimit, add_rule, dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) delete_account_limit(account='jdoe', rse_id=self.rse1_id)
def test_add_rule_dataset_none_with_weights(self): """ REPLICATION RULE (CORE): Add a replication rule on a dataset, NONE Grouping, WEIGHTS""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight="fakeweight", lifetime=None, locked=False, subscription_id=None) # Check if the Locks are created properly t1 = set([self.rse1_id, self.rse3_id, self.rse5_id]) for file in files: rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) assert(len(t1.intersection(rse_locks)) == 2) assert_in(self.rse1_id, rse_locks)
def test_add_rule_with_ignore_availability(self): """ REPLICATION RULE (CORE): Add a replication rule with ignore_availability setting""" rse = rse_name_generator() add_rse(rse) update_rse(rse, {'availability_write': False}) scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') with assert_raises(InvalidRSEExpression): add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=rse, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None)[0] add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=rse, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None, ignore_availability=True)[0]
def test_add_rule_with_purge(self): """ REPLICATION RULE (CORE): Add a replication rule with purge setting""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse4, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None, purge_replicas=True)[0] delete_rule(rule_id) # Check if the Locks are created properly for file in files: replica = get_replica(rse=self.rse4, scope=file['scope'], name=file['name']) assert(replica['tombstone'] == OBSOLETE)
def test_change_rule_lifetime(self): """ REPLICATION RULE (CLIENT): Change rule lifetime""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight='fakeweight', lifetime=150, locked=True, subscription_id=None)[0] get = self.rule_client.get_replication_rule(rule_id_1) self.rule_client.update_replication_rule(rule_id_1, options={'lifetime': 10000}) get2 = self.rule_client.get_replication_rule(rule_id_1) assert(get['expires_at'] != get2['expires_at'])
def test_list_rules_by_did(self): """ DID (CLIENT): List Replication Rules per DID """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] rule_id_2 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] ret = self.did_client.list_did_rules(scope=scope, name=dataset) ids = [rule['id'] for rule in ret] assert_in(rule_id_1, ids) assert_in(rule_id_2, ids)
def test_dataset_callback_no(self): """ REPLICATION RULE (CORE): Test dataset callback should not be sent""" scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') set_status(scope=scope, name=dataset, open=False) rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, notify='C')[0] successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse3_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse3_id, nowait=False) # Check if rule exists assert(False == check_dataset_ok_callback(scope, dataset, self.rse3, rule_id))
def test_rse_counter_unavailable_replicas(self): """ REPLICATION RULE (CORE): Test if creating UNAVAILABLE replicas updates the RSE Counter correctly""" rse_update(once=True) rse_counter_before = get_rse_counter(self.rse3_id) scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the rse has been updated correctly rse_update(once=True) rse_counter_after = get_rse_counter(self.rse3_id) assert(rse_counter_before['bytes'] + 3*100 == rse_counter_after['bytes']) assert(rse_counter_before['files'] + 3 == rse_counter_after['files'])
def test_add_rule_container_none(self): """ REPLICATION RULE (CORE): Add a replication rule on a container, NONE Grouping""" scope = 'mock' container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), 'jdoe') all_files = [] for i in xrange(3): files = create_files(3, scope, self.rse1) all_files.extend(files) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') attach_dids(scope, container, [{'scope': scope, 'name': dataset}], 'jdoe') add_rule(dids=[{'scope': scope, 'name': container}], account='jdoe', copies=1, rse_expression=self.T2, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) for file in all_files: rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) assert_in(self.rse4_id, rse_locks) assert_not_in(self.rse5_id, rse_locks)
def test_delete_rule_and_cancel_transfers(self): """ REPLICATION RULE (CORE): Test to delete a previously created rule and do not cancel overlapping transfers""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=3, rse_expression=self.T1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] delete_rule(rule_id_1) for file in files: rse_locks = get_replica_locks(scope=file['scope'], name=file['name']) assert(len(rse_locks) == 5) # TODO Need to check transfer queue here, this is actually not the check of this test case assert_raises(RuleNotFound, delete_rule, uuid())
def test_account_counter_rule_create(self): """ REPLICATION RULE (CORE): Test if the account counter is updated correctly when new rule is created""" account_update(once=True) account_counter_before = get_account_counter(self.rse1_id, 'jdoe') scope = 'mock' files = create_files(3, scope, self.rse1, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the counter has been updated correctly account_update(once=True) account_counter_after = get_account_counter(self.rse1_id, 'jdoe') assert(account_counter_before['bytes'] + 3*100 == account_counter_after['bytes']) assert(account_counter_before['files'] + 3 == account_counter_after['files'])
def test_add_rules_datasets_none(self): """ REPLICATION RULE (CORE): Add replication rules to multiple datasets, NONE Grouping""" scope = 'mock' files1 = create_files(3, scope, self.rse4) dataset1 = 'dataset_' + str(uuid()) add_did(scope, dataset1, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset1, files1, 'jdoe') files2 = create_files(3, scope, self.rse4) dataset2 = 'dataset_' + str(uuid()) add_did(scope, dataset2, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset2, files2, 'jdoe') # Add the rules to both DS add_rules(dids=[{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}], rules=[{'account': 'jdoe', 'copies': 1, 'rse_expression': self.T1, 'grouping': 'NONE', 'weight': None, 'lifetime': None, 'locked': False, 'subscription_id': None}, {'account': 'root', 'copies': 1, 'rse_expression': self.T1, 'grouping': 'NONE', 'weight': 'fakeweight', 'lifetime': None, 'locked': False, 'subscription_id': None}]) # Check if the Locks are created properly for file in files1: rse_locks = [lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])] assert(rse_locks[0] == rse_locks[1]) for file in files2: rse_locks = [lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])] assert(rse_locks[0] == rse_locks[1])
def test_judge_add_files_to_dataset(self): """ JUDGE EVALUATOR: Test the judge when adding files to dataset""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') # Add a first rule to the DS add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) attach_dids(scope, dataset, files, 'jdoe') re_evaluator(once=True) files = create_files(3, scope, self.rse1) attach_dids(scope, dataset, files, 'jdoe') # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2)
def test_repair_a_rule_with_missing_locks(self): """ JUDGE EVALUATOR: Test the judge when a rule gets STUCK from re_evaluating and there are missing locks""" scope = 'mock' files = create_files(3, scope, self.rse4) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') # Add a first rule to the DS rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] attach_dids(scope, dataset, files, 'jdoe') # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) # Add more files to the DID files2 = create_files(3, scope, self.rse4) attach_dids(scope, dataset, files2, 'jdoe') # Mark the rule STUCK to fake that the re-evaluation failed session = get_session() rule = session.query(models.ReplicationRule).filter_by(id=rule_id).one() rule.state = RuleState.STUCK session.commit() rule_repairer(once=True) for file in files: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) for file in files2: assert(len(get_replica_locks(scope=file['scope'], name=file['name'])) == 2) assert(len(set([lock.rse_id for lock in get_replica_locks(scope=files[0]['scope'], name=files[0]['name'])]).intersection(set([lock.rse_id for lock in get_replica_locks(scope=file['scope'], name=file['name'])]))) == 2) assert(12 == get_rule(rule_id)['locks_replicating_cnt'])
def test_add_rule_dataset_dataset(self): """ REPLICATION RULE (CORE): Add a replication rule on a dataset, DATASET Grouping""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None) # Check if the Locks are created properly t1 = set([self.rse1_id, self.rse3_id, self.rse5_id]) first_locks = None for file in files: if first_locks is None: first_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) rse_locks = set([lock['rse_id'] for lock in get_replica_locks(scope=file['scope'], name=file['name'])]) assert(len(t1.intersection(rse_locks)) == 2) assert(len(first_locks.intersection(rse_locks)) == 2) # Check if the DatasetLocks are created properly dataset_locks = [lock for lock in get_dataset_locks(scope=scope, name=dataset)] assert(len(t1.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2) assert(len(first_locks.intersection(set([lock['rse_id'] for lock in dataset_locks]))) == 2)
def attach_dids(scope, name, attachment, issuer): """ Append content to data did. :param attachment: The attachment. :param issuer: The issuer account. """ validate_schema(name='attachment', obj=attachment) kwargs = {'scope': scope, 'name': name, 'attachment': attachment} if not rucio.api.permission.has_permission(issuer=issuer, action='attach_dids', kwargs=kwargs): raise rucio.common.exception.AccessDenied('Account %s can not add data identifiers to %s:%s' % (issuer, scope, name)) return did.attach_dids(scope=scope, name=name, dids=attachment['dids'], account=attachment.get('account', issuer), rse=attachment.get('rse'))
def test_atlas_archival_policy(self): """ UNDERTAKER (CORE): Test the atlas archival policy. """ if get_policy() != 'atlas': LOG.info("Skipping atlas-specific test") return tmp_scope = InternalScope('mock', **self.vo) jdoe = InternalAccount('jdoe', **self.vo) root = InternalAccount('root', **self.vo) nbdatasets = 5 nbfiles = 5 rse = 'LOCALGROUPDISK_%s' % rse_name_generator() rse_id = add_rse(rse, **self.vo) set_local_account_limit(jdoe, rse_id, -1) dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for i in range(nbdatasets)] add_dids(dids=dsns2, account=root) replicas = list() for dsn in dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert (get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'] is None) for dsn in dsns2: assert (get_did(scope=InternalScope('archive', **self.vo), name=dsn['name'])['name'] == dsn['name']) assert (len([ x for x in list_rules( filters={ 'scope': InternalScope('archive', **self.vo), 'name': dsn['name'] }) ]) == 1)
def test_to_repair_a_rule_with_NONE_grouping_whose_transfer_failed(self): """ JUDGE REPAIRER: Test to repair a rule with 1 failed transfer (lock)""" rule_repairer(once=True) # Clean out the repairer scope = InternalScope('mock') files = create_files(3, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.T1, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None)[0] failed_rse_id = get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id assert (get_replica( scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['state'] == ReplicaState.COPYING) assert (get_replica(scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['lock_cnt'] == 1) successful_transfer( scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer( scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer( scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) assert (rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) assert (RuleState.REPLICATING == get_rule(rule_id)['state']) assert (get_replica( scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['state'] == ReplicaState.UNAVAILABLE) assert (get_replica(scope=files[2]['scope'], name=files[2]['name'], rse_id=failed_rse_id)['lock_cnt'] == 0)
def test_to_repair_a_rule_with_only_1_rse_whose_transfers_failed(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose transfers failed (lock)""" rule_repairer(once=True) # Clean out the repairer scope = InternalScope('mock') files = create_files(4, scope, self.rse4_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] successful_transfer( scope=scope, name=files[0]['name'], rse_id=get_replica_locks(scope=files[0]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) successful_transfer( scope=scope, name=files[1]['name'], rse_id=get_replica_locks(scope=files[1]['scope'], name=files[2]['name'])[0].rse_id, nowait=False) failed_transfer( scope=scope, name=files[2]['name'], rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) failed_transfer( scope=scope, name=files[3]['name'], rse_id=get_replica_locks(scope=files[3]['scope'], name=files[3]['name'])[0].rse_id) cancel_request_did( scope=scope, name=files[2]['name'], dest_rse_id=get_replica_locks(scope=files[2]['scope'], name=files[2]['name'])[0].rse_id) cancel_request_did( scope=scope, name=files[3]['name'], dest_rse_id=get_replica_locks(scope=files[3]['scope'], name=files[2]['name'])[0].rse_id) assert (rule_id == get_rule(rule_id)['id'].replace('-', '').lower()) assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) # Stil assert STUCK because of delays: assert (RuleState.STUCK == get_rule(rule_id)['state']) assert (get_replica_locks( scope=files[2]['scope'], name=files[2]['name'])[0].rse_id == get_replica_locks( scope=files[3]['scope'], name=files[3]['name'])[0].rse_id)
def test_repair_a_rule_with_source_replica_expression(self): """ JUDGE EVALUATOR: Test the judge when a with two rules with source_replica_expression""" scope = InternalScope('mock') files = create_files(3, scope, self.rse4_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) # Add a first rule to the DS rule_id1 = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] rule_id2 = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=self.rse3, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None, source_replica_expression=self.rse1)[0] assert (RuleState.REPLICATING == get_rule(rule_id1)['state']) assert (RuleState.STUCK == get_rule(rule_id2)['state']) successful_transfer(scope=scope, name=files[0]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[1]['name'], rse_id=self.rse1_id, nowait=False) successful_transfer(scope=scope, name=files[2]['name'], rse_id=self.rse1_id, nowait=False) # Also make replicas AVAILABLE session = get_session() replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[0]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[1]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE replica = session.query(models.RSEFileAssociation).filter_by( scope=scope, name=files[2]['name'], rse_id=self.rse1_id).one() replica.state = ReplicaState.AVAILABLE session.commit() rule_repairer(once=True) assert (RuleState.OK == get_rule(rule_id1)['state']) assert (RuleState.REPLICATING == get_rule(rule_id2)['state'])
def compose(scope, name, rse, bytes, sources, account, md5=None, adler32=None, pfn=None, meta={}, rules=[], parent_scope=None, parent_name=None, session=None): """ Concatenates a list of existing dids into a new file replica :param scope: the scope name. :param name: The data identifier name. :param rse: the rse name. :param bytes: the size of the file. :sources sources: The list of temporary DIDs. :param account: The account owner. :param md5: The md5 checksum. :param adler32: The adler32 checksum. :param pfn: Physical file name (for nondeterministic rse). :param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary. :param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ]. :param parent_scope: Possible dataset scope. :param parent_name: Possibe dataset name. :param session: The database session in use. """ # Create the new file did and replica add_replica(rse=rse, scope=scope, name=name, bytes=bytes, account=account, adler32=adler32, md5=md5, pfn=pfn, meta=meta, rules=rules, session=session) # Attach the file to a dataset if parent_scope and parent_name: attach_dids(scope=parent_scope, name=parent_name, dids=[{ 'scope': scope, 'name': name }], account=account, rse=None, session=session) # Mark the merged dids as obsolete now, expired_dids = datetime.utcnow(), [] for source in sources: expired_dids.append({ 'scope': source['scope'], 'name': source['name'], 'expired_at': now }) session.bulk_update_mappings(models.TemporaryDataIdentifier, expired_dids)
def test_judge_evaluate_detach(self): """ JUDGE EVALUATOR: Test if the detach is done correctly""" re_evaluator(once=True) scope = InternalScope('mock') container = 'container_' + str(uuid()) add_did(scope, container, DIDType.from_sym('CONTAINER'), self.jdoe) scope = InternalScope('mock') files = create_files(3, scope, self.rse1_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) scope = InternalScope('mock') files = create_files(3, scope, self.rse1_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) scope = InternalScope('mock') files = create_files(3, scope, self.rse1_id, bytes=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) attach_dids(scope, dataset, files, self.jdoe) attach_dids(scope, container, [{ 'scope': scope, 'name': dataset }], self.jdoe) # Add a first rule to the Container rule_id = add_rule(dids=[{ 'scope': scope, 'name': container }], account=self.jdoe, copies=1, rse_expression=self.rse1, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None)[0] # Fake judge re_evaluator(once=True) assert (9 == get_rule(rule_id)['locks_ok_cnt']) detach_dids(scope, dataset, [files[0]]) # Fake judge re_evaluator(once=True) assert (8 == get_rule(rule_id)['locks_ok_cnt'])
touch_dids(dids=[{'scope': tmp_scope, 'name': tmp_dsn1, 'type': DIDType.DATASET, 'accessed_at': now}]) assert_equal(now, get_did_atime(scope=tmp_scope, name=tmp_dsn1)) assert_equal(None, get_did_atime(scope=tmp_scope, name=tmp_dsn2)) def test_update_dids(self): """ DATA IDENTIFIERS (CORE): Update file size and checksum""" tmp_scope = 'mock' dsn = 'dsn_%s' % generate_uuid() lfn = 'lfn.%s' % str(generate_uuid()) add_did(scope=tmp_scope, name=dsn, type=DIDType.DATASET, account='root') files = [{'scope': tmp_scope, 'name': lfn, 'bytes': 724963570L, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}] attach_dids(scope=tmp_scope, name=dsn, rse='MOCK', dids=files, account='root') set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee') assert_equal(get_metadata(scope=tmp_scope, name=lfn)['adler32'], '0cc737ee') with assert_raises(UnsupportedOperation): set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee') set_metadata(scope=tmp_scope, name=lfn, key='bytes', value=724963577L) assert_equal(get_metadata(scope=tmp_scope, name=lfn)['bytes'], 724963577L) class TestDIDApi: def test_list_new_dids(self): """ DATA IDENTIFIERS (API): List new identifiers """
def test_undertaker(self): """ UNDERTAKER (CORE): Test the undertaker. """ tmp_scope = InternalScope('mock') jdoe = InternalAccount('jdoe') root = InternalAccount('root') nbdatasets = 5 nbfiles = 5 rse = 'MOCK' rse_id = get_rse_id('MOCK') set_account_limit(jdoe, rse_id, -1) dsns1 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1 } for i in range(nbdatasets)] dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for i in range(nbdatasets)] add_dids(dids=dsns1 + dsns2, account=root) replicas = list() for dsn in dsns1 + dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files add_rules(dids=dsns1, rules=[{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }]) undertaker(worker_number=1, total_workers=1, once=True) undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert_not_equal( get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'], None)
'events': 10 } } for i in range(nbfiles)] add_did(scope=tmp_scope, name=tmp_dsn1, type=DIDType.DATASET, account='root') add_did(scope=tmp_scope, name=tmp_dsn2, type=DIDType.DATASET, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn1, rse='MOCK', dids=files1, account='root') attach_dids(scope=tmp_scope, name=tmp_dsn2, dids=files1, account='root') set_status(scope=tmp_scope, name=tmp_dsn1, open=False) delete_replicas(rse='MOCK', files=files1) with assert_raises(DataIdentifierNotFound): get_did(scope=tmp_scope, name=tmp_dsn1) get_did(scope=tmp_scope, name=tmp_dsn2)
def test_throttler_grouped_fifo_all(self): """ THROTTLER (CLIENTS): throttler release all waiting requests (grouped fifo). """ if self.dialect == 'mysql': return True # no threshold -> release all waiting requests name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.QUEUED)
def test_throttler_grouped_fifo_subset(self): """ THROTTLER (CLIENTS): throttler release subset of waiting requests (grouped fifo). """ if self.dialect == 'mysql': return True set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) # threshold used by throttler name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_throttler_grouped_fifo_nothing(self): """ THROTTLER (CLIENTS): throttler release nothing (grouped fifo). """ if self.dialect == 'mysql': return True # four waiting requests and one active requests but threshold is 1 # more than 80% of the transfer limit are already used -> release nothing set('throttler', '%s,%s' % (self.all_activities, self.dest_rse), 1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.user_activity, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) self.db_session.commit() throttler.run(once=True, sleep_time=1) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id) assert_equal(request_1['state'], constants.RequestState.WAITING) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id) assert_equal(request_2['state'], constants.RequestState.WAITING) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_overlapping_containers_and_wildcards(rse_factory, did_factory, download_client, root_account): """ Verify that wildcard resolution is correctly done. Overlapping containers and wildcards are handled without issues. """ rse1, _ = rse_factory.make_posix_rse() rse2, _ = rse_factory.make_posix_rse() dids_on_rse1 = [did_factory.upload_test_file(rse1) for _ in range(5)] dids_on_rse2 = [did_factory.upload_test_file(rse2) for _ in range(5)] dids = dids_on_rse1 + dids_on_rse2 datasets = [did_factory.make_dataset() for _ in range(3)] container = did_factory.make_container() dids_in_dataset1, dids_in_dataset2, dids_in_dataset3 = dids[:6], dids[ 3:7], dids[4:] did_core.attach_dids(dids=dids_in_dataset1, account=root_account, **datasets[0]) did_core.attach_dids(dids=dids_in_dataset2, account=root_account, **datasets[1]) did_core.attach_dids(dids=dids_in_dataset3, account=root_account, **datasets[2]) did_core.attach_dids(dids=datasets, account=root_account, **container) dataset1_str, dataset2_str, dataset3_str = [ '%s:%s' % (d['scope'], d['name']) for d in datasets ] container_str = '%s:%s' % (container['scope'], container['name']) with TemporaryDirectory() as tmp_dir: # No filters: all dids will be grouped and downloaded together result = download_client.download_dids([{ 'did': dataset1_str, 'base_dir': tmp_dir }, { 'did': dataset2_str, 'base_dir': tmp_dir }, { 'did': dataset3_str, 'base_dir': tmp_dir }, { 'did': container_str, 'base_dir': tmp_dir }]) assert len(result) == len(dids) with TemporaryDirectory() as tmp_dir: # Verify that wildcard resolution works correctly result = download_client.download_dids([{ 'did': '%s:dataset_%s*' % (did_factory.default_scope, did_factory.base_uuid), 'base_dir': tmp_dir }, { 'did': container_str, 'base_dir': tmp_dir }]) assert len(result) == len(dids) with TemporaryDirectory() as tmp_dir: # Test with an RSE filter result = download_client.download_dids([{ 'did': dataset1_str, 'base_dir': tmp_dir, 'rse': rse1 }, { 'did': dataset2_str, 'base_dir': tmp_dir, 'rse': rse1 }, { 'did': dataset3_str, 'base_dir': tmp_dir, 'rse': rse1 }]) assert len(result) == len(dids_on_rse1) with TemporaryDirectory() as tmp_dir1, TemporaryDirectory( ) as tmp_dir2, TemporaryDirectory() as tmp_dir3: # Test with nrandom result = download_client.download_dids([{ 'did': dataset1_str, 'base_dir': tmp_dir1, 'nrandom': 3 }, { 'did': dataset2_str, 'base_dir': tmp_dir2, 'nrandom': 3 }, { 'did': dataset3_str, 'base_dir': tmp_dir3, 'nrandom': 3 }]) assert 3 <= len(result) <= 9 with TemporaryDirectory() as tmp_dir1, TemporaryDirectory() as tmp_dir2: # Test with filters complex overlapping of filters and different destination directories download_client.download_dids([{ 'did': dataset1_str, 'base_dir': tmp_dir1, 'rse': rse1 }, { 'did': dataset2_str, 'base_dir': tmp_dir1, 'rse': rse2 }, { 'did': dataset3_str, 'base_dir': tmp_dir2, 'rse': rse2 }]) dids_on_rse1_and_dataset1 = [ d for d in dids_on_rse1 if d in dids_in_dataset1 ] dids_on_rse2_and_dataset2 = [ d for d in dids_on_rse2 if d in dids_in_dataset2 ] dids_on_rse2_and_dataset3 = [ d for d in dids_on_rse2 if d in dids_in_dataset3 ] for dst_dir, expected_dids in (('%s/%s' % (tmp_dir1, datasets[0]['name']), dids_on_rse1_and_dataset1), ('%s/%s' % (tmp_dir1, datasets[1]['name']), dids_on_rse2_and_dataset2), ('%s/%s' % (tmp_dir2, datasets[2]['name']), dids_on_rse2_and_dataset3)): files_in_dir = os.listdir(dst_dir) for did in expected_dids: assert did['name'] in files_in_dir assert len(files_in_dir) == len(expected_dids)
def test_undertaker(self): """ UNDERTAKER (CORE): Test the undertaker. """ tmp_scope = InternalScope('mock', **self.vo) jdoe = InternalAccount('jdoe', **self.vo) root = InternalAccount('root', **self.vo) nbdatasets = 5 nbfiles = 5 rse = 'MOCK' rse_id = get_rse_id('MOCK', **self.vo) set_local_account_limit(jdoe, rse_id, -1) dsns1 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1 } for i in range(nbdatasets)] dsns2 = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'type': 'DATASET', 'lifetime': -1, 'rules': [{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }] } for i in range(nbdatasets)] add_dids(dids=dsns1 + dsns2, account=root) # Add generic metadata on did try: set_metadata(tmp_scope, dsns1[0]['name'], "test_key", "test_value") except NotImplementedError: # add_did_meta is not Implemented for Oracle < 12 pass replicas = list() for dsn in dsns1 + dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse_id=rse_id, dids=files, account=root) replicas += files add_rules(dids=dsns1, rules=[{ 'account': jdoe, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET' }]) undertaker(worker_number=1, total_workers=1, once=True) undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert get_replica(scope=replica['scope'], name=replica['name'], rse_id=rse_id)['tombstone'] is not None
def test_update_collection_replica(self): """ REPLICA (CORE): Update collection replicas from update requests. """ file_size = 2 files = [{ 'name': 'file_%s' % generate_uuid(), 'scope': self.scope, 'bytes': file_size } for i in range(0, 2)] dataset_name = 'dataset_test_%s' % generate_uuid() add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_dids([{ 'scope': self.scope, 'name': dataset_name, 'type': constants.DIDType.DATASET }], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, state=constants.ReplicaState.AVAILABLE, name=dataset_name, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files), available_replicas_cnt=0)\ .save(session=self.db_session) # Update request with rse id # First update -> dataset replica should be available models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( id=update_request.id).first() # pylint: disable=no-member assert update_request is None dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete one file replica -> dataset replica should be unavailable delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add one file replica -> dataset replica should be available again add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Old behaviour, open empty datasets are not deleted # Delete all file replicas -> dataset replica should be deleted delete_replicas(rse_id=self.rse_id, files=files, session=self.db_session) with pytest.raises(NoResultFound): update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) # Update request without rse_id - using two replicas per file -> total 4 replicas dataset_name = 'dataset_test_%s' % generate_uuid() add_dids([{ 'scope': self.scope, 'name': dataset_name, 'type': constants.DIDType.DATASET }], account=self.account, session=self.db_session) add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_replicas(rse_id=self.rse2_id, files=files, account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) models.CollectionReplica( rse_id=self.rse2_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) # First update -> replicas should be available models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) for dataset_replica in self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).all(): # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete first replica on first RSE -> replica on first RSE should be unavailable, replica on second RSE should be still available delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) # delete_replica creates also update object but with rse_id -> extra filter for rse_id is NULL update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Set the state of the first replica on the second RSE to UNAVAILABLE -> both replicass should be unavailable file_replica = self.db_session.query( models.RSEFileAssociation).filter_by(rse_id=self.rse2_id, scope=self.scope, name=files[0]['name']).one() # pylint: disable=no-member file_replica.state = constants.ReplicaState.UNAVAILABLE models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Delete first replica on second RSE -> file is not longer part of dataset -> both replicas should be available delete_replicas(rse_id=self.rse2_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE # Add first replica to the first RSE -> first replicas should be available add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add first replica to the second RSE -> both replicas should be available again add_replicas(rse_id=self.rse2_id, files=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE
replicas = list() for dsn in dsns2: files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'tombstone': datetime.utcnow() + timedelta(weeks=2), 'meta': { 'events': 10 } } for i in range(nbfiles)] attach_dids(scope=tmp_scope, name=dsn['name'], rse=rse, dids=files, account='root') replicas += files undertaker(worker_number=1, total_workers=1, once=True) for replica in replicas: assert (get_replica(scope=replica['scope'], name=replica['name'], rse=rse)['tombstone'] is None) for dsn in dsns2: assert (get_did(scope='archive', name=dsn['name'])['name'] == dsn['name']) assert (len([
def test_release_waiting_requests_grouped_fifo(self): """ REQUEST (CORE): release waiting requests based on grouped FIFO. """ # set max_volume to 0 to check first without releasing extra requests set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=0, max_transfers=1, session=self.db_session) # one request with an unattached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # one request with an attached DID -> one request should be released self.db_session.query(models.Request).delete() self.db_session.commit() name = generate_uuid() dataset_name = generate_uuid() add_replica(self.source_rse_id, self.scope, name, 1, self.account, session=self.db_session) add_did(self.scope, dataset_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset_name, [{'name': name, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name, 'rule_id': generate_uuid(), 'retry_count': 1, 'scope': self.scope, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request = get_request_by_did(self.scope, name, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # five requests with different requested_at and multiple attachments per collection -> release only one request -> two requests of one collection should be released self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() name5 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) dataset_2_name = generate_uuid() add_did(self.scope, dataset_2_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name5, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}, {'name': name2, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_2_name, [{'name': name3, 'scope': self.scope}, {'name': name4, 'scope': self.scope}], self.account, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'requested_at': datetime.now().replace(year=2015), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'requested_at': datetime.now().replace(year=2010), 'retry_count': 1, 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name5, 'retry_count': 1, 'requested_at': datetime.now().replace(year=2018), 'scope': self.scope, 'rule_id': generate_uuid(), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) request_5 = get_request_by_did(self.scope, name5, self.dest_rse_id, session=self.db_session) assert_equal(request_5['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> three requests should be released because of attachments and free volume space self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=10, max_transfers=1, session=self.db_session) requests = [{ 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 3, 'requested_at': datetime.now().replace(year=2021), # requested after the request below but small enough for max_volume check 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3, 'md5': '', 'adler32': '' } }, { 'source_rse_id': self.source_rse_id, 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 3000, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 3000, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) amount_updated_requests = release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) assert_equal(amount_updated_requests, 3) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # released because of available volume request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING) # with maximal volume check -> release one request -> two requests should be released because of attachments self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() name2 = generate_uuid() name3 = generate_uuid() name4 = generate_uuid() dataset_1_name = generate_uuid() add_did(self.scope, dataset_1_name, constants.DIDType.DATASET, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name1, 'scope': self.scope}], self.account, session=self.db_session) attach_dids(self.scope, dataset_1_name, [{'name': name2, 'scope': self.scope}], self.account, session=self.db_session) set_rse_transfer_limits(self.dest_rse_id, self.all_activities, volume=5, max_transfers=1, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'bytes': 1, 'scope': self.scope, 'retry_count': 1, 'rule_id': generate_uuid(), 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'bytes': 2, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'source_rse_id': self.source_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'bytes': 1, 'requested_at': datetime.now().replace(year=2020), 'rule_id': generate_uuid(), 'scope': self.scope, 'retry_count': 1, 'attributes': { 'activity': self.user_activity, 'bytes': 1, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_grouped_fifo(self.dest_rse_id, count=1, session=self.db_session) # released because it got requested first request_1 = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request_1['state'], constants.RequestState.QUEUED) # released because the DID is attached to the same dataset request_2 = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request_2['state'], constants.RequestState.QUEUED) # still waiting because there is no free volume after releasing the two requests above request_3 = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request_3['state'], constants.RequestState.WAITING) request_4 = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request_4['state'], constants.RequestState.WAITING)
def test_archive_on_dataset_level(rse_factory, did_factory, root_account): rse_name, rse_id = rse_factory.make_xroot_rse() dataset1 = did_factory.make_dataset() dataset2 = did_factory.make_dataset() container = did_factory.make_container() attach_dids(dids=[dataset1, dataset2], account=root_account, **container) # Add a random file to the datasets to avoid dataset deletion when the archive is deleted a_file = did_factory.random_did() add_replicas(rse_id=rse_id, files=[{ **a_file, 'bytes': 500, 'type': 'FILE', 'adler32': 'beefbeef' }], account=root_account) attach_dids(dids=[a_file], account=root_account, **dataset1) attach_dids(dids=[a_file], account=root_account, **dataset2) # adding a non-archive file should not set is_archive=True metadata = get_metadata(**dataset1) assert not metadata['is_archive'] # Create an archive and its constituents, attach the archive to datasets archive = did_factory.random_did(name_prefix='archive', name_suffix='.zip') add_replicas(rse_id=rse_id, files=[{ **archive, 'bytes': 500, 'type': 'FILE', 'adler32': 'beefbeef' }], account=root_account) constituents = [did_factory.random_did() for _ in range(2)] # Add archive to one dataset _before_ attaching files to the archive (before is_archive is set on the archive did) attach_dids(dids=[archive], account=root_account, **dataset1) attach_dids(dids=[{ **c, 'bytes': 200, 'adler32': 'ababbaba' } for c in constituents], account=root_account, **archive) # Attach to another dataset _after_ attaching files to the archive attach_dids(dids=[archive], account=root_account, **dataset2) # Both datasets must have is_archive = True metadata = get_metadata(**dataset1) assert metadata['is_archive'] is True metadata = get_metadata(**dataset2) assert metadata['is_archive'] is True # Delete the archive, the datasets must now have is_archive == false delete_replicas(rse_id=rse_id, files=[archive]) metadata = get_metadata(**dataset1) assert not metadata['is_archive'] metadata = get_metadata(**dataset2) assert not metadata['is_archive']
def test_release_waiting_requests_per_free_volume(self): """ REQUEST (CORE): release waiting requests that fit grouped in available volume.""" # release unattached requests that fit in available volume with respect to already submitted transfers name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because small enough request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # still waiting because requested later and to big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # still waiting because too big request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release attached requests that fit together with the dataset in available volume with respect to already submitted transfers self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.source_rse_id, self.scope, name1, 1, self.account, session=self.db_session) name2 = generate_uuid() add_replica(self.source_rse_id, self.scope, name2, 1, self.account, session=self.db_session) name3 = generate_uuid() add_replica(self.source_rse_id, self.scope, name3, 1, self.account, session=self.db_session) name4 = generate_uuid() add_replica(self.source_rse_id, self.scope, name4, 1, self.account, session=self.db_session) dataset1_name = generate_uuid() add_did(self.scope, dataset1_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset1_name, [{'name': name1, 'scope': self.scope}, {'name': name4, 'scope': self.scope}], self.account, session=self.db_session) dataset2_name = generate_uuid() add_did(self.scope, dataset2_name, constants.DIDType.DATASET, self.account, session=self.db_session) attach_dids(self.scope, dataset2_name, [{'name': name2, 'scope': self.scope}, {'name': name3, 'scope': self.scope}], self.account, session=self.db_session) request = models.Request(dest_rse_id=self.dest_rse_id, bytes=2, activity=self.all_activities, state=constants.RequestState.SUBMITTED) request.save(session=self.db_session) volume = 10 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 6, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name2, 'requested_at': datetime.now().replace(year=2020), 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name3, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2000), 'attributes': { 'activity': 'User Subscription', 'bytes': 10, 'md5': '', 'adler32': '' } }, { 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name4, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2030), 'attributes': { 'activity': 'User Subscription', 'bytes': 2, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # released because dataset fits in volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) request = get_request_by_did(self.scope, name4, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.QUEUED) # waiting because dataset is too big request = get_request_by_did(self.scope, name2, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) request = get_request_by_did(self.scope, name3, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING) # release requests with no available volume -> release nothing self.db_session.query(models.Request).delete() self.db_session.commit() name1 = generate_uuid() add_replica(self.dest_rse_id, self.scope, name1, 1, self.account, session=self.db_session) volume = 0 set_rse_transfer_limits(self.dest_rse_id, 'all_activities', volume=volume, max_transfers=1, session=self.db_session) requests = [{ 'dest_rse_id': self.dest_rse_id, 'request_type': constants.RequestType.TRANSFER, 'request_id': generate_uuid(), 'name': name1, 'scope': self.scope, 'rule_id': generate_uuid(), 'retry_count': 1, 'requested_at': datetime.now().replace(year=2015), 'attributes': { 'activity': 'User Subscription', 'bytes': 8, 'md5': '', 'adler32': '' } }] queue_requests(requests, session=self.db_session) release_waiting_requests_per_free_volume(self.dest_rse_id, volume=volume, session=self.db_session) # waiting because no available volume request = get_request_by_did(self.scope, name1, self.dest_rse_id, session=self.db_session) assert_equal(request['state'], constants.RequestState.WAITING)
def test_repair_a_rule_with_missing_locks(self): """ JUDGE EVALUATOR: Test the judge when a rule gets STUCK from re_evaluating and there are missing locks""" scope = InternalScope('mock') files = create_files(3, scope, self.rse4_id) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), self.jdoe) # Add a first rule to the DS rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=2, rse_expression=self.T1, grouping='DATASET', weight=None, lifetime=None, locked=False, subscription_id=None)[0] attach_dids(scope, dataset, files, self.jdoe) # Fake judge re_evaluator(once=True) # Check if the Locks are created properly for file in files: assert (len( get_replica_locks(scope=file['scope'], name=file['name'])) == 2) # Add more files to the DID files2 = create_files(3, scope, self.rse4_id) attach_dids(scope, dataset, files2, self.jdoe) # Mark the rule STUCK to fake that the re-evaluation failed session = get_session() rule = session.query( models.ReplicationRule).filter_by(id=rule_id).one() rule.state = RuleState.STUCK session.commit() rule_repairer(once=True) for file in files: assert (len( get_replica_locks(scope=file['scope'], name=file['name'])) == 2) for file in files2: assert (len( get_replica_locks(scope=file['scope'], name=file['name'])) == 2) assert (len( set([ lock.rse_id for lock in get_replica_locks(scope=files[0]['scope'], name=files[0]['name']) ]).intersection( set([ lock.rse_id for lock in get_replica_locks(scope=file['scope'], name=file['name']) ]))) == 2) assert (12 == get_rule(rule_id)['locks_replicating_cnt'])
def request_transfer(loop=1, src=None, dst=None, upload=False, same_src=False, same_dst=False): """ Main loop to request a new transfer. """ logging.info('request: starting') session = get_session() src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) logging.info('request: started') i = 0 while not graceful_stop.is_set(): if i >= loop: return try: if not same_src: src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) if not same_dst: dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(src_rse, lfns=[{ 'scope': 'mock', 'name': 'file-%s' % tmp_name }])['mock:file-%s' % tmp_name] if upload: # create the directories if needed p = rsemanager.create_protocol(src_rse, operation='write', scheme='srm') p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical( 'Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{ 'scope': 'mock', 'name': 'dataset-%s' % tmp_name }], account='root', session=session) break # add the replica replica.add_replica(rse=src_rse['rse'], scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) logging.info('added replica on %s for DID mock:%s' % (src_rse['rse'], tmp_name)) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{ 'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes') }], account='root', session=session) # add rule for the dataset rule.add_rule(dids=[{ 'scope': 'mock', 'name': 'dataset-%s' % tmp_name }], account='root', copies=1, rse_expression=dst_rse['rse'], grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (dst_rse['rse'], tmp_name)) session.commit() except: session.rollback() logging.critical(traceback.format_exc()) i += 1 logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def test_to_repair_a_rule_with_only_1_rse_whose_site_is_blocklisted(self): """ JUDGE REPAIRER: Test to repair a rule with only 1 rse whose site is blocklisted""" rse = rse_name_generator() rse_id = add_rse(rse, **self.vo) set_local_account_limit(self.jdoe, rse_id, -1) rule_repairer(once=True) # Clean out the repairer region = make_region().configure('dogpile.cache.pymemcache', expiration_time=900, arguments={ 'url': config_get( 'cache', 'url', False, '127.0.0.1:11211'), 'distributed_lock': True }) def change_availability(new_value): update_rse(rse_id, {'availability_write': new_value}) # clear cache region.delete(sha256(rse.encode()).hexdigest()) for grouping, ignore_availability in itertools.product( ["NONE", "DATASET", "ALL"], [True, False]): scope = InternalScope('mock', **self.vo) files = create_files(1, scope, self.rse4_id, bytes_=100) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.DATASET, self.jdoe) attach_dids(scope, dataset, files, self.jdoe) if ignore_availability: change_availability(False) rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=rse, grouping=grouping, weight=None, lifetime=None, locked=False, subscription_id=None, ignore_availability=ignore_availability, activity='DebugJudge')[0] assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) assert (RuleState.REPLICATING == get_rule(rule_id)['state']) change_availability(True) else: rule_id = add_rule(dids=[{ 'scope': scope, 'name': dataset }], account=self.jdoe, copies=1, rse_expression=rse, grouping=grouping, weight=None, lifetime=None, locked=False, subscription_id=None, ignore_availability=ignore_availability, activity='DebugJudge')[0] failed_transfer(scope=scope, name=files[0]['name'], rse_id=get_replica_locks( scope=files[0]['scope'], name=files[0]['name'])[0].rse_id) change_availability(False) assert (RuleState.STUCK == get_rule(rule_id)['state']) rule_repairer(once=True) assert (RuleState.STUCK == get_rule(rule_id)['state']) change_availability(True) rule_repairer(once=True) assert (RuleState.REPLICATING == get_rule(rule_id)['state'])
type=DIDType.DATASET, account='root') files = [{ 'scope': tmp_scope, 'name': lfn, 'bytes': 724963570L, 'adler32': '0cc737eb', 'meta': { 'guid': str(generate_uuid()), 'events': 100 } }] attach_dids(scope=tmp_scope, name=dsn, rse='MOCK', dids=files, account='root') set_metadata(scope=tmp_scope, name=lfn, key='adler32', value='0cc737ee') assert_equal( get_metadata(scope=tmp_scope, name=lfn)['adler32'], '0cc737ee') with assert_raises(UnsupportedOperation): set_metadata(scope=tmp_scope, name='Nimportnawak', key='adler32', value='0cc737ee')
def test_archive_removal_impact_on_constituents(rse_factory, did_factory, mock_scope, root_account): rse_name, rse_id = rse_factory.make_mock_rse() scope = mock_scope account = root_account # Create an 2 archives and 4 files: # - One only exists in the first archive # - One in both, plus another replica, which is not in an archive # - One in both, plus another replica, which is not in an archive; and this replica has expired # - One in both, plus another replica, which is not in an archive; and this replica has expired; but a replication rule exists on this second replica # Also add these files to datasets, one of which will be removed at the end nb_constituents = 4 nb_c_outside_archive = nb_constituents - 1 constituent_size = 2000 archive_size = 1000 uuid = str(generate_uuid()) constituents = [{ 'scope': scope, 'name': 'lfn.%s.%d' % (uuid, i) } for i in range(nb_constituents)] did_factory.register_dids(constituents) c_first_archive_only, c_with_replica, c_with_expired_replica, c_with_replica_and_rule = constituents replica_core.add_replica(rse_id=rse_id, account=account, bytes=constituent_size, **c_with_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_expired_replica) replica_core.add_replica(rse_id=rse_id, account=account, bytes=constituent_size, tombstone=datetime.utcnow() - timedelta(days=1), **c_with_replica_and_rule) rule_core.add_rule(dids=[c_with_replica_and_rule], account=account, copies=1, rse_expression=rse_name, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None) archive1, archive2 = [{ 'scope': scope, 'name': 'archive_%s.%d.zip' % (uuid, i) } for i in range(2)] replica_core.add_replica(rse_id=rse_id, bytes=archive_size, account=account, **archive1) replica_core.add_replica(rse_id=rse_id, bytes=archive_size, account=account, **archive2) did_core.attach_dids(dids=[{ 'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size } for c in constituents], account=account, **archive1) did_core.attach_dids(dids=[ { 'scope': c['scope'], 'name': c['name'], 'bytes': constituent_size } for c in [c_with_replica, c_with_expired_replica, c_with_replica_and_rule] ], account=account, **archive2) dataset1, dataset2 = [{ 'scope': scope, 'name': 'dataset_%s.%i' % (uuid, i) } for i in range(2)] did_core.add_did(type='DATASET', account=account, **dataset1) did_core.attach_dids(dids=constituents, account=account, **dataset1) did_core.add_did(type='DATASET', account=account, **dataset2) did_core.attach_dids(dids=[c_first_archive_only, c_with_expired_replica], account=account, **dataset2) @read_session def __get_archive_contents_history_count(archive, session=None): return session.query(ConstituentAssociationHistory).filter_by( **archive).count() # Run reaper the first time. # the expired non-archive replica of c_with_expired_replica must be removed, # but the did must not be remove and it must still remain in the dataset because # it still has the replica from inside the archive assert replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) REGION.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) for did in constituents + [archive1, archive2]: assert did_core.get_did(**did) for did in [archive1, archive2, c_with_replica, c_with_replica_and_rule]: assert replica_core.get_replica(rse_id=rse_id, **did) with pytest.raises(ReplicaNotFound): # The replica is only on the archive, not on the constituent replica_core.get_replica(rse_id=rse_id, **c_first_archive_only) with pytest.raises(ReplicaNotFound): # The replica outside the archive was removed by reaper nb_c_outside_archive -= 1 replica_core.get_replica(rse_id=rse_id, **c_with_expired_replica) # Compared to get_replica, list_replicas resolves archives, must return replicas for all files assert len(list(replica_core.list_replicas(dids=constituents))) == 4 assert len(list(did_core.list_content(**dataset1))) == 4 assert len(list(did_core.list_archive_content(**archive1))) == 4 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 0 assert __get_archive_contents_history_count(archive2) == 0 # Expire the first archive and run reaper again # the archive will be removed; and c_first_archive_only must be removed from datasets # and from the did table. replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive1) REGION.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=2 * archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=2 * archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive1) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) assert len(list(replica_core.list_replicas(dids=constituents))) == 3 assert len(list(did_core.list_content(**dataset1))) == 3 assert len(list(did_core.list_archive_content(**archive1))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 3 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 0 # Expire the second archive replica and run reaper another time # c_with_expired_replica is removed because its external replica got removed at previous step # and it exist only inside the archive now. # If not open, Dataset2 will be removed because it will be empty. did_core.set_status(open=False, **dataset2) replica_core.set_tombstone(rse_id=rse_id, tombstone=datetime.utcnow() - timedelta(days=1), **archive2) REGION.invalidate() rse_core.set_rse_limits(rse_id=rse_id, name='MinFreeSpace', value=archive_size + nb_c_outside_archive * constituent_size) rse_core.set_rse_usage(rse_id=rse_id, source='storage', used=archive_size + nb_c_outside_archive * constituent_size, free=1) reaper(once=True, rses=[], include_rses=rse_name, exclude_rses=None) # The archive must be removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**archive2) # The DIDs which only existed in the archive are also removed with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_first_archive_only) with pytest.raises(DataIdentifierNotFound): assert did_core.get_did(**c_with_expired_replica) # If the DID has a non-expired replica outside the archive without rules on it, the DID is not removed assert did_core.get_did(**c_with_replica) # If the DID has an expired replica outside the archive, but has rules on that replica, the DID is not removed assert did_core.get_did(**c_with_replica_and_rule) assert len(list(replica_core.list_replicas(dids=constituents))) == 2 assert len(list(did_core.list_content(**dataset1))) == 2 with pytest.raises(DataIdentifierNotFound): did_core.get_did(**dataset2) assert len(list(did_core.list_content(**dataset2))) == 0 assert len(list(did_core.list_archive_content(**archive2))) == 0 assert __get_archive_contents_history_count(archive1) == 4 assert __get_archive_contents_history_count(archive2) == 3