def main(argv): try: did_client=DIDClient() rule_client=RuleClient() openfile_name = sys.argv[1] scope_name = 'twgrid-user-wchang' mysql_engine = create_engine("mysql://*****:*****@rucio-db01.grid.sinica.edu.tw/rucio") with open(openfile_name) as file: for line in file: connection = mysql_engine.connect() data=line.strip('\n') dataset=data.split(" ")[0] file_name=data.split(" ")[1] file_size=int(data.split(" ")[2]) account=data.split(" ")[3] pre_md5='twgrid-user-wchang:'+file_name md5_sum = hashlib.md5(pre_md5).hexdigest() files = [] files.append({'scope': scope_name, 'name': file_name, 'md5':md5_sum, 'bytes':file_size, 'adler32':'0cc737eb'}) contact_db = connection.execute("select * from dids where scope='%s' and name='%s';"%(scope_name, file_name)) num_rows = contact_db.rowcount if num_rows == 0: print "Register File : %s "%file_name did_client.add_files_to_dataset(scope=scope_name, name=dataset, files=files, rse='TW-DPM01_TWGRIDSCRATCHDISK') #rule_client.add_replication_rule(dids=[{'scope': scope_name, 'name': file_name}], account=account, copies=1, \ # rse_expression='TW-DPM01_TWGRIDSCRATCHDISK', grouping='DATASET') else: print "Attach File : %s To %s "%(file_name, dataset) did_client.attach_dids(scope=scope_name, name=dataset, dids=files) connection.close() except SubscriptionDuplicate, e: print e
def rucio_register(self, filenames): files = [] dids = [] for filename in filenames: size = os.stat(str(filename)).st_size adler = adler32(str(filename)) files.append({ 'scope': self.scope, 'name': str(filename.parts[-1]), 'bytes': size, 'adler32': adler, 'pfn': self.pfn + str(filename.parts[-1]) }) replica_client = ReplicaClient() replica_client.add_replicas(rse=self.rse, files=files) didclient = DIDClient() didclient.add_files_to_dataset(self.scope, self.dataset, files)
class TestDIDClients: def setup(self): self.account_client = AccountClient() self.scope_client = ScopeClient() self.meta_client = MetaClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.rse_client = RSEClient() def test_list_dids(self): """ DATA IDENTIFIERS (CLIENT): List dids by pattern.""" tmp_scope = scope_name_generator() tmp_files = [] tmp_files.append('file_a_1%s' % generate_uuid()) tmp_files.append('file_a_2%s' % generate_uuid()) tmp_files.append('file_b_1%s' % generate_uuid()) tmp_rse = 'MOCK' self.scope_client.add_scope('jdoe', tmp_scope) for tmp_file in tmp_files: self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'): results.append(result) assert_equal(len(results), 1) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'): results.append(result) assert_equal(len(results), 3) results = [] filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)} for result in self.did_client.list_dids(tmp_scope, filters): results.append(result) assert_equal(len(results), 0) with assert_raises(UnsupportedOperation): self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype') def test_list_recursive(self): """ DATA IDENTIFIERS (CLIENT): List did recursive """ # Create nested containers and datast tmp_scope_1 = 'list-did-recursive' tmp_scope_2 = 'list-did-recursive-2' self.scope_client.add_scope('root', tmp_scope_1) self.scope_client.add_scope('root', tmp_scope_2) tmp_container_1 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1) tmp_container_2 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2) tmp_dataset_1 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1) tmp_dataset_2 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}]) # List DIDs not recursive - only the first container is expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})] assert_equal(dids, [tmp_container_1]) # List DIDs recursive - first container and all attached collections are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 in dids) assert_true(tmp_dataset_2 in dids) assert_equal(len(dids), 4) # List DIDs recursive - only containers are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 not in dids) assert_true(tmp_dataset_2 not in dids) assert_equal(len(dids), 2) def test_list_by_length(self): """ DATA IDENTIFIERS (CLIENT): List did with length """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0}) results = [] for d in dids: results.append(d) assert_not_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length': 0}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) def test_list_by_metadata(self): """ DATA IDENTIFIERS (CLIENT): List did with metadata""" dsns = [] tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn1) dataset_meta = {'project': 'data12_8TeV', 'run_number': 400000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m920', } self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta) tmp_dsn2 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn2) dataset_meta['run_number'] = 400001 self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta) tmp_dsn3 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn3) dataset_meta['stream_name'] = 'physics_Egamma' dataset_meta['datatype'] = 'NTUP_SMWZ' self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn1) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn2) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) with assert_raises(KeyNotFound): self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'}) def test_add_did(self): """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample""" tmp_scope = 'mock' tmp_rse = 'MOCK' tmp_dsn = 'dsn_%s' % generate_uuid() root = InternalAccount('root') set_local_account_limit(root, get_rse_id('MOCK'), -1) set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1) # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename dataset_meta = {'project': 'data13_hip', 'run_number': 300000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m927', } rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}] with assert_raises(ScopeNotFound): self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules) files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ] with assert_raises(DataIdentifierNotFound): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files) with assert_raises(DataIdentifierNotFound): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files) files = [] for i in range(5): lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 10} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}] with assert_raises(InvalidPath): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files = [] for i in range(5): lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 100} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}] with assert_raises(InvalidPath): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse) self.did_client.close(scope=tmp_scope, name=tmp_dsn) tmp_dsn_output = 'dsn_%s' % generate_uuid() self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)] assert_equal(len(files), 2) def test_attach_dids_to_dids(self): """ DATA IDENTIFIERS (CLIENT): Attach dids to dids""" tmp_scope = 'mock' tmp_rse = 'MOCK' nb_datasets = 5 nb_files = 5 attachments, dsns = list(), list() guid_to_query = None dsn = {} for i in range(nb_datasets): attachment = {} attachment['scope'] = tmp_scope attachment['name'] = 'dsn.%s' % str(generate_uuid()) attachment['rse'] = tmp_rse files = [] for i in range(nb_files): files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) attachment['dids'] = files guid_to_query = files[0]['meta']['guid'] dsn = {'scope': tmp_scope, 'name': attachment['name']} dsns.append(dsn) attachments.append(attachment) self.did_client.add_datasets(dsns=dsns) self.did_client.attach_dids_to_dids(attachments=attachments) dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)] assert_equal([dsn], dsns_l) cnt_name = 'cnt_%s' % generate_uuid() self.did_client.add_container(scope='mock', name=cnt_name) with assert_raises(UnsupportedOperation): self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}]) def test_add_files_to_datasets(self): """ DATA IDENTIFIERS (CLIENT): Add files to Datasets""" tmp_scope = 'mock' tmp_rse = 'MOCK' dsn1 = 'dsn.%s' % str(generate_uuid()) dsn2 = 'dsn.%s' % str(generate_uuid()) meta = {'transient': True} files1, files2, nb_files = [], [], 5 for i in range(nb_files): files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1, rse=tmp_rse, meta=meta) self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2, rse=tmp_rse, meta=meta) attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse}, {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}] self.did_client.add_files_to_datasets(attachments) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 10) with assert_raises(FileAlreadyExists): self.did_client.add_files_to_datasets(attachments) for attachment in attachments: for i in range(nb_files): attachment['dids'].append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 15) # Corrupt meta-data files = [] for attachment in attachments: for file in attachment['dids']: file['bytes'] = 1000 break with assert_raises(FileConsistencyMismatch): self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) def test_add_dataset(self): """ DATA IDENTIFIERS (CLIENT): Add dataset """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'}) did = self.did_client.get_did(tmp_scope, tmp_dsn) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_dsn) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_add_datasets(self): """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """ tmp_scope = 'mock' dsns = list() for i in range(500): tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}} dsns.append(tmp_dsn) self.did_client.add_datasets(dsns) def test_exists(self): """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """ tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() tmp_rse = 'MOCK' self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') did = self.did_client.get_did(tmp_scope, tmp_file) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_file) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_did_hierarchy(self): """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """ account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(4)] cnt = ['cnt_%s' % generate_uuid() for i in range(4)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(4): self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: pass # TODO: fix, fix, fix # if r['name'] == cnt[1]: # assert_equal(r['type'], 'container') # assert_equal(r['level'], 0) # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]): # assert_equal(r['level'], 0) # else: # assert_equal(r['level'], 1) def test_detach_did(self): """ DATA IDENTIFIERS (CLIENT): Detach dids from a did""" account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(5)] cnt = ['cnt_%s' % generate_uuid() for i in range(2)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(5): self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None) for i in range(2): self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None) for i in range(5): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}]) with assert_raises(UnsupportedOperation): self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}]) self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: if r['name'] == dst[1]: assert_equal(r['level'], 0) if r['type'] == 'file': if (r['name'] in file[6:9]): assert_equal(r['level'], 0) else: assert_not_equal(r['level'], 0) with assert_raises(UnsupportedOperation): self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}]) self.did_client.close(scope, dst[4]) metadata = self.did_client.get_metadata(scope, dst[4]) i_bytes, i_length = metadata['bytes'], metadata['length'] metadata = self.did_client.get_metadata(scope, file[8]) file1_bytes = metadata['bytes'] metadata = self.did_client.get_metadata(scope, file[9]) file2_bytes = metadata['bytes'] self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}]) metadata = self.did_client.get_metadata(scope, dst[4]) f_bytes, f_length = metadata['bytes'], metadata['length'] assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes) assert_equal(i_length, f_length + 1 + 1) def test_scope_list(self): """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """ # create some dummy data self.tmp_accounts = ['jdoe' for i in range(3)] self.tmp_scopes = [scope_name_generator() for i in range(3)] self.tmp_rses = [rse_name_generator() for i in range(3)] self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)] self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)] self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)] # add dummy data to the catalogue for i in range(3): self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i]) self.rse_client.add_rse(self.tmp_rses[i]) self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb') # put files in datasets for i in range(3): for j in range(3): files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}] self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j]) self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files) # put datasets in containers for i in range(3): for j in range(3): datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}] self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j]) self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets) # reverse check if everything is in order for i in range(3): result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True) r_topdids = [] r_otherscopedids = [] r_scope = [] for r in result: if r['level'] == 0: r_topdids.append(r['scope'] + ':' + r['name']) r_scope.append(r['scope']) if r['scope'] != self.tmp_scopes[i]: r_otherscopedids.append(r['scope'] + ':' + r['name']) assert_in(r['level'], [1, 2]) for j in range(3): assert_equal(self.tmp_scopes[i], r_scope[j]) if j != i: assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids) assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids) def test_get_did(self): """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() dsn = generate_uuid() self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') did = self.did_client.get_did(scope, file) assert_equal(did['scope'], scope) assert_equal(did['name'], file) self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000) did2 = self.did_client.get_did(scope, dsn) assert_equal(type(did2['expired_at']), datetime) def test_get_meta(self): """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() keys = ['project', 'run_number'] values = ['data13_hip', 12345678] self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') for i in range(2): self.did_client.set_metadata(scope, file, keys[i], values[i]) meta = self.did_client.get_metadata(scope, file) for i in range(2): assert_equal(meta[keys[i]], values[i]) def test_list_content(self): """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier""" rse = 'MOCK' scope = 'mock' nbfiles = 5 dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] self.did_client.add_dataset(scope, dataset1) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse) self.did_client.add_container(scope, container) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_datasets_to_container(scope, container, datasets) contents = self.did_client.list_content(scope, container) datasets_s = [d['name'] for d in contents] assert_in(dataset1, datasets_s) assert_in(dataset2, datasets_s) def test_list_files(self): """ DATA IDENTIFIERS (CLIENT): List files for a container""" rse = 'MOCK' scope = 'mock' dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) for i in range(10): self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb') self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb') self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_container(scope, container) self.did_client.add_datasets_to_container(scope, container, datasets) # List file content content = self.did_client.list_files(scope, files1[i]['name']) assert_true(content is not None) for d in content: assert_true(d['name'] == files1[i]['name']) # List container content for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]: assert_in(d, files1 + files2) # List non-existing data identifier content with assert_raises(DataIdentifierNotFound): self.did_client.list_files(scope, 'Nimportnawak') def test_list_replicas(self): """ DATA IDENTIFIERS (CLIENT): List replicas for a container""" rse = 'MOCK' scope = 'mock' dsn1 = generate_uuid() dsn2 = generate_uuid() cnt = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) self.did_client.add_dataset(scope, dsn1) self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse) self.did_client.add_dataset(scope, dsn2) self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse) self.did_client.add_container(scope, cnt) self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}]) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}]) assert_true(replicas is not None) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}]) assert_true(replicas is not None) @raises(UnsupportedOperation) def test_close(self): """ DATA IDENTIFIERS (CLIENT): test to close data identifiers""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files) @raises def test_open(self): """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True) def test_bulk_get_meta(self): """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back""" key = 'project' rse = 'MOCK' scope = 'mock' files = ['file_%s' % generate_uuid() for _ in range(4)] dst = ['dst_%s' % generate_uuid() for _ in range(4)] cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] meta_mapping = {} list_dids = [] for idx in range(4): self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb') self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx) list_dids.append({'scope': scope, 'name': files[idx]}) meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx) for idx in range(4): self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': dst[idx]}) meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx) for idx in range(4): self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': cnt[idx]}) meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta] res_list_dids.sort() list_dids.sort() assert_equal(list_dids, res_list_dids) for meta in list_meta: did = '%s:%s' % (meta['scope'], meta['name']) met = meta_mapping[did] assert_equal((key, meta[key]), met) cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 12) list_dids = [] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 0)
class TestReplicaClients: def setup(self): self.replica_client = ReplicaClient() self.did_client = DIDClient() def test_add_list_bad_replicas(self): """ REPLICA (CLIENT): Add bad replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_bad_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1: if badrep['scope'] == rep['scope'] and badrep[ 'name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Run necromancer once necromancer_run(threads=1, bulk=10000, once=True) # Try to attach a lost file tmp_dsn = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) with assert_raises(UnsupportedOperation): self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK') # Adding replicas to non-deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK2') rse_id2 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) print(replicas, list_rep) r = self.replica_client.declare_bad_file_replicas( replicas, 'This is a good reason') print(r) assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id2: if badrep['scope'] == rep['scope'] and badrep[ 'name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Now adding non-existing bad replicas files = [ 'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_bad_file_replicas( files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_add_suspicious_replicas(self): """ REPLICA (CLIENT): Add suspicious replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) # Adding replicas to non-deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) # Now adding non-existing bad replicas files = [ 'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_suspicious_file_replicas( files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_bad_replica_methods_for_UI(self): """ REPLICA (REST): Test the listing of bad and suspicious replicas """ mw = [] headers1 = { 'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******' } result = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) assert_equal(result.status, 200) token = str(result.header('X-Rucio-Auth-Token')) headers2 = {'X-Rucio-Auth-Token': str(token)} data = dumps({}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_files = [] for line in result.body.split('\n'): if line != '': tot_files.append(dumps(line)) nb_tot_files = len(tot_files) data = dumps({'state': 'B'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_bad_files = [] for line in result.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files1 = len(tot_bad_files) data = dumps({'state': 'S', 'list_pfns': 'True'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_suspicious_files = [] for line in result.body.split('\n'): if line != '': tot_suspicious_files.append(dumps(line)) nb_tot_suspicious_files = len(tot_suspicious_files) data = dumps({'state': 'T', 'list_pfns': 'True'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_temporary_unavailable_files = [] for line in result.body.split('\n'): if line != '': tot_temporary_unavailable_files.append(dumps(line)) nb_tot_temporary_unavailable_files = len( tot_temporary_unavailable_files) assert_equal( nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files + nb_tot_temporary_unavailable_files) tomorrow = datetime.utcnow() + timedelta(days=1) data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_bad_files = [] for line in result.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files = len(tot_bad_files) assert_equal(nb_tot_bad_files, 0) data = dumps({}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) nb_tot_bad_files2 = 0 for line in result.body.split('\n'): if line != '': line = loads(line) nb_tot_bad_files2 += int(line.get('BAD', 0)) assert_equal(nb_tot_bad_files1, nb_tot_bad_files2) def test_add_list_replicas(self): """ REPLICA (CLIENT): Add, change state and list file replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files1) files2 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files2) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files1]) ] assert_equal(len(replicas), len(files1)) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files2], schemes=['file']) ] assert_equal(len(replicas), 5) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files2], schemes=['srm']) ] assert_equal(len(replicas), 5) files3 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files3) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files3], schemes=['file']) ] for i in range(nbfiles): assert_equal(replicas[i]['rses'], {}) files4 = [] for file in files3: file['state'] = 'A' files4.append(file) self.replica_client.update_replicas_states('MOCK3', files=files4) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files3], schemes=['file'], unavailable=True) ] assert_equal(len(replicas), 5) for i in range(nbfiles): assert_in('MOCK3', replicas[i]['rses']) def test_delete_replicas(self): """ REPLICA (CLIENT): Add and delete file replicas """ tmp_scope = 'mock' nbfiles = 5 files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) with assert_raises(AccessDenied): self.replica_client.delete_replicas(rse='MOCK', files=files) # replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files])] # assert_equal(len(replicas), 0) def test_add_temporary_unavailable_pfns(self): """ REPLICA (CLIENT): Add temporary unavailable PFNs""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): pfn = replica['pfns'].keys()[0] list_rep.append(pfn) # Submit bad PFNs now = datetime.utcnow() reason_str = generate_uuid() self.replica_client.add_bad_pfns(pfns=list_rep, reason=str(reason_str), state='TEMPORARY_UNAVAILABLE', expires_at=now.isoformat()) result = get_bad_pfns(limit=10000, thread=None, total_threads=None, session=None) bad_pfns = {} for res in result: bad_pfns[res['pfn']] = (res['state'], res['reason'], res['expires_at']) for pfn in list_rep: pfn = str(clean_surls([pfn])[0]) assert_in(pfn, bad_pfns) assert_equal(str(bad_pfns[pfn][0]), 'TEMPORARY_UNAVAILABLE') assert_equal(bad_pfns[pfn][1], reason_str) # Submit with wrong state with assert_raises(RucioException): self.replica_client.add_bad_pfns(pfns=list_rep, reason=str(reason_str), state='BADSTATE', expires_at=now.isoformat()) # Run minos once minos_run(threads=1, bulk=10000, once=True) result = get_bad_pfns(limit=10000, thread=None, total_threads=None, session=None) pfns = [res['pfn'] for res in result] res_pfns = [] for replica in list_rep: if replica in pfns: res_pfns.append(replica) assert_equal(res_pfns, []) # Check the state in the replica table for did in files: rep = get_replicas_state(scope=did['scope'], name=did['name']) assert_equal(str(rep.keys()[0]), 'TEMPORARY_UNAVAILABLE') rep = [] for did in files: did['state'] = ReplicaState.from_sym('TEMPORARY_UNAVAILABLE') rep.append(did) # Run the minos expiration minos_temp_run(threads=1, once=True) # Check the state in the replica table for did in files: rep = get_replicas_state(scope=did['scope'], name=did['name']) assert_equal(str(rep.keys()[0]), 'AVAILABLE') def test_set_tombstone(self): """ REPLICA (CLIENT): set tombstone on replica """ # Set tombstone on one replica rse = 'MOCK4' scope = 'mock' user = '******' name = generate_uuid() add_replica(rse, scope, name, 4, user) assert_equal(get_replica(rse, scope, name)['tombstone'], None) self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }]) assert_equal(get_replica(rse, scope, name)['tombstone'], OBSOLETE) # Set tombstone on locked replica name = generate_uuid() add_replica(rse, scope, name, 4, user) RuleClient().add_replication_rule([{ 'name': name, 'scope': scope }], 1, rse, locked=True) with assert_raises(ReplicaIsLocked): self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }]) # Set tombstone on not found replica name = generate_uuid() with assert_raises(ReplicaNotFound): self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }])
class TestReplicaMetalink: def setup(self): self.did_client = DIDClient() self.replica_client = ReplicaClient() self.base_client = BaseClient(account='root', ca_cert=config_get('client', 'ca_cert'), auth_type='x509') self.token = self.base_client.headers['X-Rucio-Auth-Token'] self.fname = generate_uuid() rses = ['MOCK', 'MOCK3', 'MOCK4'] dsn = generate_uuid() self.files = [{ 'scope': 'mock', 'name': self.fname, 'bytes': 1, 'adler32': '0cc737eb' }] self.did_client.add_dataset(scope='mock', name=dsn) self.did_client.add_files_to_dataset('mock', name=dsn, files=self.files, rse='MOCK') for r in rses: self.replica_client.add_replicas(r, self.files) def test_list_replicas_metalink_4(self): """ REPLICA (METALINK): List replicas as metalink version 4 """ ml = xmltodict.parse(self.replica_client.list_replicas( self.files, metalink=4, unavailable=True, schemes=['https', 'sftp', 'file']), xml_attribs=False) assert_equal(3, len(ml['metalink']['file']['url'])) def test_get_did_from_pfns_nondeterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites""" rse = 'MOCK2' tmp_scope = 'mock' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], False) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] for f in files: input[f['pfn']] = {'scope': f['scope'], 'name': f['name']} add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for replica in list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE } for f in files], schemes=['srm'], ignore_availability=True): for rse in replica['rses']: pfns.extend(replica['rses'][rse]) for result in self.replica_client.get_did_from_pfns(pfns, rse): pfn = result.keys()[0] assert_equal(input[pfn], result.values()[0]) def test_get_did_from_pfns_deterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites""" tmp_scope = 'mock' rse = 'MOCK3' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], True) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] p = rsemgr.create_protocol(rse_info, 'read', scheme='srm') for f in files: pfn = p.lfns2pfns(lfns={ 'scope': f['scope'], 'name': f['name'] }).values()[0] pfns.append(pfn) input[pfn] = {'scope': f['scope'], 'name': f['name']} add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for result in self.replica_client.get_did_from_pfns(pfns, rse): pfn = result.keys()[0] assert_equal(input[pfn], result.values()[0])
class TestReplicaClients: def setup(self): self.replica_client = ReplicaClient() self.did_client = DIDClient() def test_add_list_bad_replicas(self): """ REPLICA (CLIENT): Add bad replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Run necromancer once run(threads=1, bulk=10000, once=True) # Try to attach a lost file tmp_dsn = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) with assert_raises(UnsupportedOperation): self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK') # Adding replicas to non-deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK2') rse_id2 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) print(replicas, list_rep) r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason') print(r) assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id2: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Now adding non-existing bad replicas files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_bad_file_replicas(files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_add_suspicious_replicas(self): """ REPLICA (CLIENT): Add suspicious replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) # Adding replicas to non-deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) # Now adding non-existing bad replicas files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_suspicious_file_replicas(files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_bad_replica_methods_for_UI(self): """ REPLICA (REST): Test the listing of bad and suspicious replicas """ mw = [] headers1 = {'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******'} r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) assert_equal(r1.status, 200) token = str(r1.header('X-Rucio-Auth-Token')) headers2 = {'X-Rucio-Auth-Token': str(token)} data = dumps({}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_files = [] for line in r2.body.split('\n'): if line != '': tot_files.append(dumps(line)) nb_tot_files = len(tot_files) data = dumps({'state': 'B'}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_bad_files = [] for line in r2.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files1 = len(tot_bad_files) data = dumps({'state': 'S', 'list_pfns': 'True'}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_suspicious_files = [] for line in r2.body.split('\n'): if line != '': tot_suspicious_files.append(dumps(line)) nb_tot_suspicious_files = len(tot_suspicious_files) assert_equal(nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files) tomorrow = datetime.utcnow() + timedelta(days=1) data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_bad_files = [] for line in r2.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files = len(tot_bad_files) assert_equal(nb_tot_bad_files, 0) data = dumps({}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) nb_tot_bad_files2 = 0 for line in r2.body.split('\n'): if line != '': line = loads(line) nb_tot_bad_files2 += int(line['BAD']) assert_equal(nb_tot_bad_files1, nb_tot_bad_files2) def test_add_list_replicas(self): """ REPLICA (CLIENT): Add, change state and list file replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files1) files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files2) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files1])] assert_equal(len(replicas), len(files1)) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['file'])] assert_equal(len(replicas), 5) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['srm'])] assert_equal(len(replicas), 5) files3 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files3) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'])] for i in range(nbfiles): assert_equal(replicas[i]['rses'], {}) files4 = [] for file in files3: file['state'] = 'A' files4.append(file) self.replica_client.update_replicas_states('MOCK3', files=files4) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'], unavailable=True)] assert_equal(len(replicas), 5) for i in range(nbfiles): assert_in('MOCK3', replicas[i]['rses']) def test_delete_replicas(self): """ REPLICA (CLIENT): Add and delete file replicas """ tmp_scope = 'mock' nbfiles = 5 files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) with assert_raises(AccessDenied): self.replica_client.delete_replicas(rse='MOCK', files=files)
class TestDIDClients: def setup(self): self.account_client = AccountClient() self.scope_client = ScopeClient() self.meta_client = MetaClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.rse_client = RSEClient() def test_list_dids(self): """ DATA IDENTIFIERS (CLIENT): List dids by pattern.""" tmp_scope = scope_name_generator() tmp_files = [] tmp_files.append('file_a_1%s' % generate_uuid()) tmp_files.append('file_a_2%s' % generate_uuid()) tmp_files.append('file_b_1%s' % generate_uuid()) tmp_rse = 'MOCK' self.scope_client.add_scope('jdoe', tmp_scope) for tmp_file in tmp_files: self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1L, '0cc737eb') results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file\_a\_*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file\_a\_1*'}, type='file'): results.append(result) assert_equal(len(results), 1) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file\__\_1*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'): results.append(result) assert_equal(len(results), 3) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}): results.append(result) assert_equal(len(results), 0) with assert_raises(UnsupportedOperation): self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype') def test_list_by_metadata(self): """ DATA IDENTIFIERS (CLIENT): List did with metadata""" dsns = [] tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn1) dataset_meta = {'project': 'data12_8TeV', 'run_number': 400000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m920', } self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta) tmp_dsn2 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn2) dataset_meta['run_number'] = 400001 self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta) tmp_dsn3 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn3) dataset_meta['stream_name'] = 'physics_Egamma' dataset_meta['datatype'] = 'NTUP_SMWZ' self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn1) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn2) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) with assert_raises(KeyNotFound): self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'}) def test_add_did(self): """ DATA IDENTIFIERS (CLIENT): Add, populate and list did content""" tmp_scope = 'mock' tmp_rse = 'MOCK' tmp_dsn = 'dsn_%s' % generate_uuid() # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename dataset_meta = {'project': 'data13_hip', 'run_number': 300000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m927', } rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}] with assert_raises(ScopeNotFound): self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules) self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules) with assert_raises(DataIdentifierNotFound): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=[{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570L, 'adler32': '0cc737eb'}, ]) files = [] for i in xrange(5): lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 10} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570L, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files = [] for i in xrange(5): lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 100} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570L, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) self.did_client.close(scope=tmp_scope, name=tmp_dsn) def test_attach_dids_to_dids(self): """ DATA IDENTIFIERS (CLIENT): Attach dids to dids""" tmp_scope = 'mock' tmp_rse = 'MOCK' nb_datasets = 5 nb_files = 5 attachments, dsns = list(), list() guid_to_query = None dsn = {} for i in xrange(nb_datasets): attachment = {} attachment['scope'] = tmp_scope attachment['name'] = 'dsn.%s' % str(generate_uuid()) attachment['rse'] = tmp_rse files = [] for i in xrange(nb_files): files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570L, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) attachment['dids'] = files guid_to_query = files[0]['meta']['guid'] dsn = {'scope': tmp_scope, 'name': attachment['name']} dsns.append(dsn) attachments.append(attachment) self.did_client.add_datasets(dsns=dsns) self.did_client.attach_dids_to_dids(attachments=attachments) l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)] assert_equal([dsn], l) cnt_name = 'cnt_%s' % generate_uuid() self.did_client.add_container(scope='mock', name=cnt_name) with assert_raises(UnsupportedOperation): self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}]) def test_add_dataset(self): """ DATA IDENTIFIERS (CLIENT): Add dataset """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'}) did = self.did_client.get_did(tmp_scope, tmp_dsn) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_dsn) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_add_datasets(self): """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """ tmp_scope = 'mock' dsns = list() for i in xrange(500): tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}} dsns.append(tmp_dsn) self.did_client.add_datasets(dsns) def test_exists(self): """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """ tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() tmp_rse = 'MOCK' self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1L, '0cc737eb') did = self.did_client.get_did(tmp_scope, tmp_file) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_file) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_did_hierarchy(self): """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """ account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(4)] cnt = ['cnt_%s' % generate_uuid() for i in range(4)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(4): self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1L, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1L, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: pass # TODO: fix, fix, fix # if r['name'] == cnt[1]: # assert_equal(r['type'], 'container') # assert_equal(r['level'], 0) # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]): # assert_equal(r['level'], 0) # else: # assert_equal(r['level'], 1) def test_detach_did(self): """ DATA IDENTIFIERS (CLIENT): Detach dids from a did""" account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(4)] cnt = ['cnt_%s' % generate_uuid() for i in range(2)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1L, '0cc737eb') for i in range(4): self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None) for i in range(2): self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1L, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1L, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}]) with assert_raises(UnsupportedOperation): self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}]) self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: if r['name'] == dst[1]: assert_equal(r['level'], 0) if r['type'] is 'file': if (r['name'] in file[6:9]): assert_equal(r['level'], 0) else: assert_not_equal(r['level'], 0) with assert_raises(UnsupportedOperation): self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}]) def test_scope_list(self): """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """ # create some dummy data self.tmp_accounts = ['jdoe' for i in xrange(3)] self.tmp_scopes = [scope_name_generator() for i in xrange(3)] self.tmp_rses = [rse_name_generator() for i in xrange(3)] self.tmp_files = ['file_%s' % generate_uuid() for i in xrange(3)] self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in xrange(3)] self.tmp_containers = ['container_%s' % generate_uuid() for i in xrange(3)] # add dummy data to the catalogue for i in xrange(3): self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i]) self.rse_client.add_rse(self.tmp_rses[i]) self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1L, '0cc737eb') # put files in datasets for i in xrange(3): for j in xrange(3): files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1L, 'adler32': '0cc737eb'}] self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j]) self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files)
def _rucio_register(beamline, uid, filenames): """ Register the file in rucio for replication to SDCC. """ scope = beamline container = uid replica_client = ReplicaClient() didclient = DIDClient() scopeclient = ScopeClient() ruleclient = RuleClient() for root, ending, filename in filenames: #size = os.stat(str(filename)).st_size #adler = adler32(str(filename)) files = [{ 'scope': scope, 'name': filename.split('/')[-1], 'bytes': 1000, #'adler32': "unknown", 'pfn': pfn + filename }] dataset = os.path.join(root, ending) dataset = '.'.join(dataset.split('/')[1:-1]) print("DATASET", dataset) breakpoint() try: scopeclient.add_scope(account='nsls2data', scope=scope) except rucio.common.exception.Duplicate: pass replica_client.add_replicas(rse=rse, files=files) # Create a new container if it doesn't exist. try: didclient.add_did(scope=scope, name=uid, type='container') except rucio.common.exception.DataIdentifierAlreadyExists: pass # Create a replication rule. try: dids = [{'scope': scope, 'name': container}] ruleclient.add_replication_rule( dids=dids, copies=1, rse_expression='SDCC', lifetime=86400, # Seconds account='nsls2data', source_replica_expression='NSLS2', purge_replicas=True, comment='purge_replicas in 24 hours') except rucio.common.exception.DuplicateRule: pass # Create a new dataset if it doesn't exist. try: didclient.add_did(scope=scope, name=dataset, type='dataset') except rucio.common.exception.DataIdentifierAlreadyExists: pass attachment = { 'scope': scope, 'name': uid, 'dids': [{ 'scope': scope, 'name': dataset }] } try: didclient.add_files_to_dataset(scope, dataset, files) except rucio.common.exception.FileAlreadyExists: pass try: didclient.add_datasets_to_containers([attachment]) except rucio.common.exception.DuplicateContent: pass