class TestMetaDIDClient(unittest.TestCase): """ Test the metadata DID client """ def setUp(self): """ Setup the Test Case """ self.did_client = DIDClient() self.meta_client = MetaClient() self.rse_client = RSEClient() self.scope_client = ScopeClient() def test_add_list_meta(self): """ META DID (CLIENTS): Add metadata to a data identifier""" # Add a scope tmp_scope = 'mock' # Add a dataset tmp_dataset = 'dsn_%s' % uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add a key key = 'project' value = 'data13_hip' self.did_client.set_metadata(scope=tmp_scope, name=tmp_dataset, key=key, value=value) meta = self.did_client.get_metadata(scope=tmp_scope, name=tmp_dataset) assert key in meta assert meta[key] == value
class TestMetaDIDClient(): def setup(self): self.did_client = DIDClient() self.meta_client = MetaClient() self.rse_client = RSEClient() self.scope_client = ScopeClient() def test_add_list_meta(self): """ META DID (CLIENTS): Add metadata to a data identifier""" # Add a scope tmp_scope = 'mock' # Add a dataset tmp_dataset = 'dsn_%s' % uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add a key key = 'project' value = 'data13_hip' self.did_client.set_metadata(scope=tmp_scope, name=tmp_dataset, key=key, value=value) meta = self.did_client.get_metadata(scope=tmp_scope, name=tmp_dataset) assert_in(key, meta) assert_equal(meta[key], value)
class TestNamingConventionCore(unittest.TestCase): ''' Class to test naming convention enforcement. ''' def setUp(self): """ Constructor.""" if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': get_vo()} else: self.vo = {} self.did_client = DIDClient() def test_naming_convention(self): """ NAMING_CONVENTION(CORE): Add and validate naming convention.""" conventions = {} for convention in list_naming_conventions(): conventions[convention['scope']] = convention['regexp'] scope = InternalScope('mock', **self.vo) if scope not in conventions: add_naming_convention( scope=scope, regexp=r'^(?P<project>mock)\.(?P<datatype>\w+)\.\w+$', convention_type=KeyType.DATASET) meta = validate_name(scope=InternalScope('mck', **self.vo), name='mock.DESD.yipeeee', did_type='D') assert meta is None meta = validate_name(scope=scope, name='mock.DESD.yipeeee', did_type='D') assert meta == {u'project': 'mock', u'datatype': 'DESD'} with pytest.raises(InvalidObject): validate_name(scope=scope, name='mockyipeeee', did_type='D') # Register a dataset tmp_dataset = 'mock.AD.' + str(generate_uuid()) with pytest.raises(InvalidObject): self.did_client.add_dataset(scope='mock', name=tmp_dataset, meta={'datatype': 'DESD'}) with pytest.raises(InvalidObject): self.did_client.add_dataset(scope='mock', name=tmp_dataset) tmp_dataset = 'mock.AOD.' + str(generate_uuid()) self.did_client.add_dataset(scope='mock', name=tmp_dataset) observed_datatype = self.did_client.get_metadata( scope='mock', name=tmp_dataset)['datatype'] assert observed_datatype == 'AOD' delete_naming_convention(scope=scope, convention_type=KeyType.DATASET)
class TestNamingConventionCore: ''' Class to test naming convention enforcement. ''' def __init__(self): """ Constructor.""" self.did_client = DIDClient() def test_naming_convention(self): """ NAMING_CONVENTION(CORE): Add and validate naming convention.""" conventions = {} for convention in list_naming_conventions(): conventions[convention['scope']] = convention['regexp'] scope = InternalScope('mock') if scope not in conventions: add_naming_convention( scope=scope, regexp='^(?P<project>mock)\.(?P<datatype>\w+)\.\w+$', convention_type=KeyType.DATASET) meta = validate_name(scope=InternalScope('mck'), name='mock.DESD.yipeeee', did_type='D') assert_equal(meta, None) meta = validate_name(scope=scope, name='mock.DESD.yipeeee', did_type='D') assert_equal(meta, {u'project': 'mock', u'datatype': 'DESD'}) with assert_raises(InvalidObject): validate_name(scope=scope, name='mockyipeeee', did_type='D') # Register a dataset tmp_dataset = 'mock.AD.' + str(generate_uuid()) with assert_raises(InvalidObject): self.did_client.add_dataset(scope='mock', name=tmp_dataset, meta={'datatype': 'DESD'}) with assert_raises(InvalidObject): self.did_client.add_dataset(scope='mock', name=tmp_dataset) tmp_dataset = 'mock.AOD.' + str(generate_uuid()) self.did_client.add_dataset(scope='mock', name=tmp_dataset) observed_datatype = self.did_client.get_metadata( scope='mock', name=tmp_dataset)['datatype'] assert_equal(observed_datatype, 'AOD') delete_naming_convention(scope=scope, regexp='(?P<project>mock)\.(\w+)$', convention_type=KeyType.DATASET)
class TestDIDClients: def setup(self): self.account_client = AccountClient() self.scope_client = ScopeClient() self.meta_client = MetaClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.rse_client = RSEClient() def test_list_dids(self): """ DATA IDENTIFIERS (CLIENT): List dids by pattern.""" tmp_scope = scope_name_generator() tmp_files = [] tmp_files.append('file_a_1%s' % generate_uuid()) tmp_files.append('file_a_2%s' % generate_uuid()) tmp_files.append('file_b_1%s' % generate_uuid()) tmp_rse = 'MOCK' self.scope_client.add_scope('jdoe', tmp_scope) for tmp_file in tmp_files: self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'): results.append(result) assert_equal(len(results), 1) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'): results.append(result) assert_equal(len(results), 3) results = [] filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)} for result in self.did_client.list_dids(tmp_scope, filters): results.append(result) assert_equal(len(results), 0) with assert_raises(UnsupportedOperation): self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype') def test_list_recursive(self): """ DATA IDENTIFIERS (CLIENT): List did recursive """ # Create nested containers and datast tmp_scope_1 = 'list-did-recursive' tmp_scope_2 = 'list-did-recursive-2' self.scope_client.add_scope('root', tmp_scope_1) self.scope_client.add_scope('root', tmp_scope_2) tmp_container_1 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1) tmp_container_2 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2) tmp_dataset_1 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1) tmp_dataset_2 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}]) # List DIDs not recursive - only the first container is expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})] assert_equal(dids, [tmp_container_1]) # List DIDs recursive - first container and all attached collections are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 in dids) assert_true(tmp_dataset_2 in dids) assert_equal(len(dids), 4) # List DIDs recursive - only containers are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 not in dids) assert_true(tmp_dataset_2 not in dids) assert_equal(len(dids), 2) def test_list_by_length(self): """ DATA IDENTIFIERS (CLIENT): List did with length """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0}) results = [] for d in dids: results.append(d) assert_not_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length': 0}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) def test_list_by_metadata(self): """ DATA IDENTIFIERS (CLIENT): List did with metadata""" dsns = [] tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn1) dataset_meta = {'project': 'data12_8TeV', 'run_number': 400000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m920', } self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta) tmp_dsn2 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn2) dataset_meta['run_number'] = 400001 self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta) tmp_dsn3 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn3) dataset_meta['stream_name'] = 'physics_Egamma' dataset_meta['datatype'] = 'NTUP_SMWZ' self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn1) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn2) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) with assert_raises(KeyNotFound): self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'}) def test_add_did(self): """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample""" tmp_scope = 'mock' tmp_rse = 'MOCK' tmp_dsn = 'dsn_%s' % generate_uuid() root = InternalAccount('root') set_local_account_limit(root, get_rse_id('MOCK'), -1) set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1) # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename dataset_meta = {'project': 'data13_hip', 'run_number': 300000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m927', } rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}] with assert_raises(ScopeNotFound): self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules) files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ] with assert_raises(DataIdentifierNotFound): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files) with assert_raises(DataIdentifierNotFound): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files) files = [] for i in range(5): lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 10} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}] with assert_raises(InvalidPath): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files = [] for i in range(5): lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 100} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}] with assert_raises(InvalidPath): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse) self.did_client.close(scope=tmp_scope, name=tmp_dsn) tmp_dsn_output = 'dsn_%s' % generate_uuid() self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)] assert_equal(len(files), 2) def test_attach_dids_to_dids(self): """ DATA IDENTIFIERS (CLIENT): Attach dids to dids""" tmp_scope = 'mock' tmp_rse = 'MOCK' nb_datasets = 5 nb_files = 5 attachments, dsns = list(), list() guid_to_query = None dsn = {} for i in range(nb_datasets): attachment = {} attachment['scope'] = tmp_scope attachment['name'] = 'dsn.%s' % str(generate_uuid()) attachment['rse'] = tmp_rse files = [] for i in range(nb_files): files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) attachment['dids'] = files guid_to_query = files[0]['meta']['guid'] dsn = {'scope': tmp_scope, 'name': attachment['name']} dsns.append(dsn) attachments.append(attachment) self.did_client.add_datasets(dsns=dsns) self.did_client.attach_dids_to_dids(attachments=attachments) dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)] assert_equal([dsn], dsns_l) cnt_name = 'cnt_%s' % generate_uuid() self.did_client.add_container(scope='mock', name=cnt_name) with assert_raises(UnsupportedOperation): self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}]) def test_add_files_to_datasets(self): """ DATA IDENTIFIERS (CLIENT): Add files to Datasets""" tmp_scope = 'mock' tmp_rse = 'MOCK' dsn1 = 'dsn.%s' % str(generate_uuid()) dsn2 = 'dsn.%s' % str(generate_uuid()) meta = {'transient': True} files1, files2, nb_files = [], [], 5 for i in range(nb_files): files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1, rse=tmp_rse, meta=meta) self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2, rse=tmp_rse, meta=meta) attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse}, {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}] self.did_client.add_files_to_datasets(attachments) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 10) with assert_raises(FileAlreadyExists): self.did_client.add_files_to_datasets(attachments) for attachment in attachments: for i in range(nb_files): attachment['dids'].append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 15) # Corrupt meta-data files = [] for attachment in attachments: for file in attachment['dids']: file['bytes'] = 1000 break with assert_raises(FileConsistencyMismatch): self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) def test_add_dataset(self): """ DATA IDENTIFIERS (CLIENT): Add dataset """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'}) did = self.did_client.get_did(tmp_scope, tmp_dsn) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_dsn) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_add_datasets(self): """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """ tmp_scope = 'mock' dsns = list() for i in range(500): tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}} dsns.append(tmp_dsn) self.did_client.add_datasets(dsns) def test_exists(self): """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """ tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() tmp_rse = 'MOCK' self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') did = self.did_client.get_did(tmp_scope, tmp_file) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_file) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_did_hierarchy(self): """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """ account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(4)] cnt = ['cnt_%s' % generate_uuid() for i in range(4)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(4): self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: pass # TODO: fix, fix, fix # if r['name'] == cnt[1]: # assert_equal(r['type'], 'container') # assert_equal(r['level'], 0) # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]): # assert_equal(r['level'], 0) # else: # assert_equal(r['level'], 1) def test_detach_did(self): """ DATA IDENTIFIERS (CLIENT): Detach dids from a did""" account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(5)] cnt = ['cnt_%s' % generate_uuid() for i in range(2)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(5): self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None) for i in range(2): self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None) for i in range(5): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}]) with assert_raises(UnsupportedOperation): self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}]) self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: if r['name'] == dst[1]: assert_equal(r['level'], 0) if r['type'] == 'file': if (r['name'] in file[6:9]): assert_equal(r['level'], 0) else: assert_not_equal(r['level'], 0) with assert_raises(UnsupportedOperation): self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}]) self.did_client.close(scope, dst[4]) metadata = self.did_client.get_metadata(scope, dst[4]) i_bytes, i_length = metadata['bytes'], metadata['length'] metadata = self.did_client.get_metadata(scope, file[8]) file1_bytes = metadata['bytes'] metadata = self.did_client.get_metadata(scope, file[9]) file2_bytes = metadata['bytes'] self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}]) metadata = self.did_client.get_metadata(scope, dst[4]) f_bytes, f_length = metadata['bytes'], metadata['length'] assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes) assert_equal(i_length, f_length + 1 + 1) def test_scope_list(self): """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """ # create some dummy data self.tmp_accounts = ['jdoe' for i in range(3)] self.tmp_scopes = [scope_name_generator() for i in range(3)] self.tmp_rses = [rse_name_generator() for i in range(3)] self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)] self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)] self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)] # add dummy data to the catalogue for i in range(3): self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i]) self.rse_client.add_rse(self.tmp_rses[i]) self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb') # put files in datasets for i in range(3): for j in range(3): files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}] self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j]) self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files) # put datasets in containers for i in range(3): for j in range(3): datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}] self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j]) self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets) # reverse check if everything is in order for i in range(3): result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True) r_topdids = [] r_otherscopedids = [] r_scope = [] for r in result: if r['level'] == 0: r_topdids.append(r['scope'] + ':' + r['name']) r_scope.append(r['scope']) if r['scope'] != self.tmp_scopes[i]: r_otherscopedids.append(r['scope'] + ':' + r['name']) assert_in(r['level'], [1, 2]) for j in range(3): assert_equal(self.tmp_scopes[i], r_scope[j]) if j != i: assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids) assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids) def test_get_did(self): """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() dsn = generate_uuid() self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') did = self.did_client.get_did(scope, file) assert_equal(did['scope'], scope) assert_equal(did['name'], file) self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000) did2 = self.did_client.get_did(scope, dsn) assert_equal(type(did2['expired_at']), datetime) def test_get_meta(self): """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() keys = ['project', 'run_number'] values = ['data13_hip', 12345678] self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') for i in range(2): self.did_client.set_metadata(scope, file, keys[i], values[i]) meta = self.did_client.get_metadata(scope, file) for i in range(2): assert_equal(meta[keys[i]], values[i]) def test_list_content(self): """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier""" rse = 'MOCK' scope = 'mock' nbfiles = 5 dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] self.did_client.add_dataset(scope, dataset1) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse) self.did_client.add_container(scope, container) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_datasets_to_container(scope, container, datasets) contents = self.did_client.list_content(scope, container) datasets_s = [d['name'] for d in contents] assert_in(dataset1, datasets_s) assert_in(dataset2, datasets_s) def test_list_files(self): """ DATA IDENTIFIERS (CLIENT): List files for a container""" rse = 'MOCK' scope = 'mock' dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) for i in range(10): self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb') self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb') self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_container(scope, container) self.did_client.add_datasets_to_container(scope, container, datasets) # List file content content = self.did_client.list_files(scope, files1[i]['name']) assert_true(content is not None) for d in content: assert_true(d['name'] == files1[i]['name']) # List container content for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]: assert_in(d, files1 + files2) # List non-existing data identifier content with assert_raises(DataIdentifierNotFound): self.did_client.list_files(scope, 'Nimportnawak') def test_list_replicas(self): """ DATA IDENTIFIERS (CLIENT): List replicas for a container""" rse = 'MOCK' scope = 'mock' dsn1 = generate_uuid() dsn2 = generate_uuid() cnt = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) self.did_client.add_dataset(scope, dsn1) self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse) self.did_client.add_dataset(scope, dsn2) self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse) self.did_client.add_container(scope, cnt) self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}]) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}]) assert_true(replicas is not None) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}]) assert_true(replicas is not None) @raises(UnsupportedOperation) def test_close(self): """ DATA IDENTIFIERS (CLIENT): test to close data identifiers""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files) @raises def test_open(self): """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True) def test_bulk_get_meta(self): """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back""" key = 'project' rse = 'MOCK' scope = 'mock' files = ['file_%s' % generate_uuid() for _ in range(4)] dst = ['dst_%s' % generate_uuid() for _ in range(4)] cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] meta_mapping = {} list_dids = [] for idx in range(4): self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb') self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx) list_dids.append({'scope': scope, 'name': files[idx]}) meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx) for idx in range(4): self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': dst[idx]}) meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx) for idx in range(4): self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': cnt[idx]}) meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta] res_list_dids.sort() list_dids.sort() assert_equal(list_dids, res_list_dids) for meta in list_meta: did = '%s:%s' % (meta['scope'], meta['name']) met = meta_mapping[did] assert_equal((key, meta[key]), met) cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 12) list_dids = [] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 0)
class TestDidMetaClient(unittest.TestCase): def setUp(self): self.did_client = DIDClient() self.tmp_scope = 'mock' self.session = get_session() self.json_implemented = JSONDidMeta().json_implemented(self.session) def tearDown(self): self.session.commit() # pylint: disable=no-member def test_set_metadata(self): """ META (CLIENTS) : Adds a fully set json column to a did, updates if some keys present """ tmp_name = 'name_%s' % generate_uuid() self.did_client.add_did(scope=self.tmp_scope, name=tmp_name, type="DATASET") # Test JSON case if self.json_implemented: # data1 = ["key1": "value_" + str(generate_uuid()), "key2": "value_" + str(generate_uuid()), "key3": "value_" + str(generate_uuid())] value1 = "value_" + str(generate_uuid()) value2 = "value_" + str(generate_uuid()) value3 = "value_" + str(generate_uuid()) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key1", value=value1) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key2", value=value2) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key3", value=value3) metadata = self.did_client.get_metadata(scope=self.tmp_scope, name=tmp_name, plugin="JSON") assert len(metadata) == 3 assert metadata['key1'] == value1 assert metadata['key2'] == value2 assert metadata['key3'] == value3 # Test DID_COLUMNS case self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key='project', value='data12_12TeV') assert self.did_client.get_metadata( scope=self.tmp_scope, name=tmp_name)['project'] == 'data12_12TeV' def test_delete_metadata(self): """ META (CLIENTS) : Deletes metadata key """ tmp_name = 'name_%s' % generate_uuid() self.did_client.add_did(scope=self.tmp_scope, name=tmp_name, type="DATASET") # Test JSON case if self.json_implemented: value1 = "value_" + str(generate_uuid()) value2 = "value_" + str(generate_uuid()) value3 = "value_" + str(generate_uuid()) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key1", value=value1) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key2", value=value2) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key3", value=value3) self.did_client.delete_metadata(scope=self.tmp_scope, name=tmp_name, key='key2') metadata = self.did_client.get_metadata(scope=self.tmp_scope, name=tmp_name, plugin="JSON") assert len(metadata) == 2 assert metadata['key1'] == value1 assert metadata['key3'] == value3 with pytest.raises(KeyNotFound): self.did_client.delete_metadata(scope=self.tmp_scope, name=tmp_name, key="key9") def test_get_metadata(self): """ META (CLIENTS) : Gets all metadata for the given did """ tmp_name = 'name_%s' % generate_uuid() self.did_client.add_did(scope=self.tmp_scope, name=tmp_name, type="DATASET") # Test JSON case if self.json_implemented: value1 = "value_" + str(generate_uuid()) value2 = "value_" + str(generate_uuid()) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key1", value=value1) self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key="key2", value=value2) metadata = self.did_client.get_metadata(scope=self.tmp_scope, name=tmp_name, plugin="JSON") assert len(metadata) == 2 assert metadata['key1'] == value1 assert metadata['key2'] == value2 # Test DID_COLUMNS case self.did_client.set_metadata(scope=self.tmp_scope, name=tmp_name, key='project', value='data12_14TeV') assert self.did_client.get_metadata( scope=self.tmp_scope, name=tmp_name)['project'] == 'data12_14TeV' # Test Mixed case if self.json_implemented: all_metadata = self.did_client.get_metadata(scope=self.tmp_scope, name=tmp_name, plugin="ALL") assert all_metadata['key1'] == value1 assert all_metadata['key2'] == value2 assert all_metadata['project'] == "data12_14TeV" def test_list_dids_extended(self): """ META (CLIENTS) : Get all dids matching the values of the provided metadata keys """ # Test did Columns use case dsns = [] tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn1) dataset_meta = { 'project': 'data12_8TeV', 'run_number': 400000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m920', } self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta) tmp_dsn2 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn2) dataset_meta['run_number'] = 400001 self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta) tmp_dsn3 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn3) dataset_meta['stream_name'] = 'physics_Egamma' dataset_meta['datatype'] = 'NTUP_SMWZ' self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta) dids = self.did_client.list_dids_extended(tmp_scope, { 'project': 'data12_8TeV', 'version': 'f392_m920' }) results = [] for d in dids: results.append(d) for dsn in dsns: assert dsn in results dsns.remove(tmp_dsn1) dids = self.did_client.list_dids_extended(tmp_scope, { 'project': 'data12_8TeV', 'run_number': 400001 }) results = [] for d in dids: results.append(d) for dsn in dsns: assert dsn in results dsns.remove(tmp_dsn2) dids = self.did_client.list_dids_extended( tmp_scope, { 'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ' }) results = [] for d in dids: results.append(d) for dsn in dsns: assert dsn in results # Test JSON use case if self.json_implemented: did1 = 'name_%s' % generate_uuid() did2 = 'name_%s' % generate_uuid() did3 = 'name_%s' % generate_uuid() did4 = 'name_%s' % generate_uuid() key1 = 'key_1_%s' % generate_uuid() key2 = 'key_2_%s' % generate_uuid() key3 = 'key_3_%s' % generate_uuid() value1 = 'value_1_%s' % generate_uuid() value2 = 'value_2_%s' % generate_uuid() value3 = 'value_3_%s' % generate_uuid() value_not_1 = 'value_not_1_%s' % generate_uuid() value_not_2 = 'value_not_1_%s' % generate_uuid() value_unique = 'value_unique_%s' % generate_uuid() self.did_client.add_did(scope=tmp_scope, name=did1, type="DATASET") self.did_client.add_did(scope=tmp_scope, name=did2, type="DATASET") self.did_client.add_did(scope=tmp_scope, name=did3, type="DATASET") self.did_client.add_did(scope=tmp_scope, name=did4, type="DATASET") self.did_client.set_metadata(scope=tmp_scope, name=did1, key=key1, value=value1) self.did_client.set_metadata(scope=tmp_scope, name=did1, key=key2, value=value2) self.did_client.set_metadata(scope=tmp_scope, name=did2, key=key1, value=value1) self.did_client.set_metadata(scope=tmp_scope, name=did2, key=key2, value=value_not_2) self.did_client.set_metadata(scope=tmp_scope, name=did2, key=key3, value=value3) self.did_client.set_metadata(scope=tmp_scope, name=did3, key=key1, value=value_not_1) self.did_client.set_metadata(scope=tmp_scope, name=did3, key=key2, value=value2) self.did_client.set_metadata(scope=tmp_scope, name=did3, key=key3, value=value3) self.did_client.set_metadata(scope=tmp_scope, name=did4, key=key1, value=value1) self.did_client.set_metadata(scope=tmp_scope, name=did4, key=key2, value=value2) self.did_client.set_metadata(scope=tmp_scope, name=did4, key=key3, value=value_unique) # Key not there dids = self.did_client.list_dids_extended(tmp_scope, {'key45': 'value'}) results = [] for d in dids: results.append(d) assert len(results) == 0 # Value not there dids = self.did_client.list_dids_extended( tmp_scope, {key1: 'value_not_there'}) results = [] for d in dids: results.append(d) assert len(results) == 0 # key1 = value1 dids = self.did_client.list_dids_extended(tmp_scope, {key1: value1}) results = [] for d in dids: results.append(d) assert len(results) == 3 assert did1 in results assert did2 in results assert did4 in results # key1, key2 dids = self.did_client.list_dids_extended(tmp_scope, { key1: value1, key2: value2 }) results = [] for d in dids: results.append(d) assert len(results) == 2 assert did1 in results assert did4 in results # key1, key2, key 3 dids = self.did_client.list_dids_extended(tmp_scope, { key1: value1, key2: value2, key3: value3 }) results = [] for d in dids: results.append(d) assert len(results) == 0 # key3 = unique value dids = self.did_client.list_dids_extended(tmp_scope, {key3: value_unique}) results = [] for d in dids: results.append(d) assert len(results) == 1 assert did4 in results
class TestBinRucio(): def setup(self): try: remove('/tmp/.rucio_root/auth_token_root') except OSError as e: if e.args[0] != 2: raise e self.marker = '$> ' self.host = config_get('client', 'rucio_host') self.auth_host = config_get('client', 'auth_host') self.user = '******' self.def_rse = 'MOCK4' self.did_client = DIDClient() self.replica_client = ReplicaClient() self.account_client = AccountLimitClient() self.account_client.set_account_limit('root', self.def_rse, -1) def test_rucio_version(self): """CLIENT(USER): Rucio version""" cmd = 'bin/rucio --version' exitcode, out, err = execute(cmd) nose.tools.assert_true('rucio' in err) def test_rucio_ping(self): """CLIENT(USER): Rucio ping""" cmd = 'rucio --host %s ping' % self.host print(self.marker + cmd) exitcode, out, err = execute(cmd) def test_add_account(self): """CLIENT(ADMIN): Add account""" tmp_val = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal('Added new account: %s\n' % tmp_val, out) def test_whoami(self): """CLIENT(USER): Rucio whoami""" cmd = 'rucio whoami' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_regexp_matches(out, re.compile('.*account.*')) def test_add_identity(self): """CLIENT(ADMIN): Add identity""" tmp_val = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_val exitcode, out, err = execute(cmd) nose.tools.assert_equal('Added new account: %s\n' % tmp_val, out) cmd = 'rucio-admin identity add --account %s --type GSS --id [email protected] --email [email protected]' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal( 'Added new identity to account: [email protected]%s\n' % tmp_val, out) def test_del_identity(self): """CLIENT(ADMIN): Test del identity""" tmp_acc = account_name_generator() # create account cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) # add identity to account cmd = 'rucio-admin identity add --account %s --type GSS --id [email protected] --email [email protected]' % tmp_acc exitcode, out, err = execute(cmd) # delete identity from account cmd = 'rucio-admin identity delete --account %s --type GSS --id [email protected]' % tmp_acc print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal('Deleted identity: [email protected]\n', out) # list identities for account cmd = 'rucio-admin account list-identities %s' % (tmp_acc) print(self.marker + cmd) print(cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal('', out) def test_attributes(self): """CLIENT(ADMIN): Add/List/Delete attributes""" tmp_acc = account_name_generator() # create account cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) # add attribute to the account cmd = 'rucio-admin account add-attribute {0} --key test_attribute_key --value true'.format( tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) # list attributes cmd = 'rucio-admin account list-attributes {0}'.format(tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) # delete attribute to the account cmd = 'rucio-admin account delete-attribute {0} --key test_attribute_key'.format( tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) def test_add_scope(self): """CLIENT(ADMIN): Add scope""" tmp_scp = scope_name_generator() tmp_acc = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) cmd = 'rucio-admin scope add --account %s --scope %s' % (tmp_acc, tmp_scp) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal( 'Added new scope to account: %s-%s\n' % (tmp_scp, tmp_acc), out) def test_add_rse(self): """CLIENT(ADMIN): Add RSE""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal('Added new RSE: %s\n' % tmp_val, out) def test_list_rses(self): """CLIENT(ADMIN): List RSEs""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val exitcode, out, err = execute(cmd) cmd = 'rucio-admin rse list' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_regexp_matches(out, re.compile('.*%s.*' % tmp_val)) def test_upload(self): """CLIENT(USER): Upload""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val exitcode, out, err = execute(cmd) cmd = 'rucio upload' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) def test_download(self): """CLIENT(USER): Download""" cmd = 'rucio download' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) def test_upload_file(self): """CLIENT(USER): Rucio upload files""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) nose.tools.assert_true( "File %s:%s successfully uploaded on the storage" % (self.user, tmp_file1[5:]) in out) def test_upload_file_guid(self): """CLIENT(USER): Rucio upload file with guid""" tmp_file1 = file_generator() tmp_guid = generate_uuid() cmd = 'rucio upload --rse {0} --guid {1} --scope {2} {3}'.format( self.def_rse, tmp_guid, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) nose.tools.assert_true( "File %s:%s successfully uploaded on the storage" % (self.user, tmp_file1[5:]) in out) def test_upload_repeated_file(self): """CLIENT(USER): Rucio upload repeated files""" # One of the files to upload is already catalogued but was removed tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # get the rule for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) rule = out # delete the file from the catalog cmd = "rucio delete-rule {0}".format(rule) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # delete the fisical file cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) nose.tools.assert_not_equal( "File %s:%s successfully uploaded on the storage" % (self.user, tmp_file1[5:]) in out, None) def test_upload_repeated_file_dataset(self): """CLIENT(USER): Rucio upload repeated files to dataset""" # One of the files to upload is already in the dataset tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # upload the files to the dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file1 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None) # tmp_file3 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None) def test_upload_file_dataset(self): """CLIENT(USER): Rucio upload files to dataset""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None) def test_upload_adds_md5digest(self): """CLIENT(USER): Upload Checksums""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) # user uploads file cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, filename) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # When inspecting the metadata of the new file the user finds the md5 checksum meta = self.did_client.get_metadata(scope=self.user, name=filename[5:]) nose.tools.assert_in('md5', meta) nose.tools.assert_equal(meta['md5'], file_md5) remove(filename) def test_create_dataset(self): """CLIENT(USER): Rucio add dataset""" tmp_name = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING cmd = 'rucio add-dataset ' + tmp_name print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search('Added ' + tmp_name, out), None) def test_add_files_to_dataset(self): """CLIENT(USER): Rucio add files to dataset""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_dataset = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # add files cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_file2) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # create dataset cmd = 'rucio add-dataset ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add files to dataset cmd = 'rucio attach {0} {3}:{1} {3}:{2}'.format( tmp_dataset, tmp_file1[5:], tmp_file2[5:], self.user) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # find the added files cmd = 'rucio list-files ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) def test_download_file(self): """CLIENT(USER): Rucio download files""" tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # download files cmd = 'rucio download --dir /tmp {0}:{1}'.format( self.user, tmp_file1[5:]) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/' # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_succeeds_md5only(self): """CLIENT(USER): Rucio download succeeds MD5 only""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) filesize = stat(filename).st_size lfn = { 'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': file_md5 } # user uploads file self.replica_client.add_replicas(files=[lfn], rse=self.def_rse) rse_settings = rsemgr.get_rse_info(self.def_rse) protocol = rsemgr.create_protocol(rse_settings, 'write') protocol.connect() pfn = protocol.lfns2pfns(lfn).values()[0] protocol.put(filename[5:], pfn, filename[:5]) protocol.close() remove(filename) # download files cmd = 'rucio download --dir /tmp {0}:{1}'.format( self.user, filename[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/{0}'.format(self.user) # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(filename[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_fails_badmd5(self): """CLIENT(USER): Rucio download fails on MD5 mismatch""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) filesize = stat(filename).st_size lfn = { 'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': '0123456789abcdef0123456789abcdef' } # user uploads file self.replica_client.add_replicas(files=[lfn], rse=self.def_rse) rse_settings = rsemgr.get_rse_info(self.def_rse) protocol = rsemgr.create_protocol(rse_settings, 'write') protocol.connect() pfn = protocol.lfns2pfns(lfn).values()[0] protocol.put(filename[5:], pfn, filename[:5]) protocol.close() remove(filename) # download file cmd = 'rucio download --dir /tmp {0}:{1}'.format( self.user, filename[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # a failure message 'Checksum mismatch : local _____ vs storage _____' appears report = 'Checksum\ mismatch\ \:\ local\ {0}\ vs\ recorded\ 0123456789abcdef0123456789abcdef'.format( file_md5) print('searching', report, 'in', err) nose.tools.assert_not_equal(re.search(report, err), None) # The file should not exist cmd = 'ls /tmp/' # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(filename[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_dataset(self): """CLIENT(USER): Rucio download dataset""" tmp_file1 = file_generator() tmp_dataset = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # create dataset cmd = 'rucio add-dataset ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add files to dataset cmd = 'rucio attach {0} {1}:{2}'.format( tmp_dataset, self.user, tmp_file1[5:]) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # download dataset cmd = 'rucio download --dir /tmp {0}'.format( tmp_dataset) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) search = '{0} successfully downloaded'.format( tmp_file1[5:]) # triming '/tmp/' from filename nose.tools.assert_not_equal(re.search(search, err), None) def test_create_rule(self): """CLIENT(USER): Rucio add rule""" tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rules cmd = "rucio add-rule {0}:{1} 3 'spacetoken=ATLASSCRATCHDISK'".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) rule = out[:-1] # triming new line character # check if rule exist for the file cmd = "rucio list-rules {0}:{1}".format(self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(rule, out), None) def test_delete_rule(self): """CLIENT(USER): rule deletion""" self.account_client.set_account_limit('root', self.def_rse, -1) tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rules cmd = "rucio add-rule {0}:{1} 1 'spacetoken=ATLASSCRATCHDISK'".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(err) print(out) # get the rules for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) (rule1, rule2) = out.split() # delete the rules for the file cmd = "rucio delete-rule {0}".format(rule1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = "rucio delete-rule {0}".format(rule2) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the file cmd = "rucio list-dids {0}:{1}".format(self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(5, len(out.splitlines())) def test_add_file_twice(self): """CLIENT(USER): Add file twice""" tmp_file1 = file_generator() # add file twice cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) nose.tools.assert_equal( re.search( "File {0}:{1} successfully uploaded on the storage".format( self.user, tmp_file1[5:]), out), None) def test_add_delete_add_file(self): """CLIENT(USER): Add/Delete/Add""" tmp_file1 = file_generator() # add file cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # get the rule for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) rule = out # delete the file from the catalog cmd = "rucio delete-rule {0}".format(rule) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # delete the fisical file cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # modify the file to avoid same checksum cmd = "echo 'delta' >> {0}".format(tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add the same file cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal( re.search( "File {0}:{1} successfully uploaded on the storage".format( self.user, tmp_file1[5:]), out), None) def test_attach_files_dataset(self): """CLIENT(USER): Rucio attach files to dataset""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # upload the files cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # attach the files to the dataset cmd = 'rucio attach {0} {1}:{2} {1}:{3}'.format( tmp_dsn, self.user, tmp_file2[5:], tmp_file3[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file2 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file2[5:]), out), None) # tmp_file3 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None) def test_detach_files_dataset(self): """CLIENT(USER): Rucio detach files to dataset""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # detach the files to the dataset cmd = 'rucio detach {0} {1}:{2} {1}:{3}'.format( tmp_dsn, self.user, tmp_file2[5:], tmp_file3[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file1 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None) # tmp_file3 must be in the dataset nose.tools.assert_equal( re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None) def test_attach_file_twice(self): """CLIENT(USER): Rucio attach a file twice""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) # attach the files to the dataset cmd = 'rucio attach {0} {1}:{2}'.format( tmp_dsn, self.user, tmp_file1[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal(re.search("The file already exists", err), None) def test_detach_non_existing_file(self): """CLIENT(USER): Rucio detach a non existing file""" tmp_file1 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) # attach the files to the dataset cmd = 'rucio detach {0} {1}:{2}'.format( tmp_dsn, self.user, 'file_ghost') # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal( re.search("Data identifier not found.", err), None)
class TestBinRucio(): def setup(self): try: remove('/tmp/.rucio_root/auth_token_root') except OSError as e: if e.args[0] != 2: raise e self.marker = '$> ' self.host = config_get('client', 'rucio_host') self.auth_host = config_get('client', 'auth_host') self.user = '******' self.def_rse = 'MOCK4' self.did_client = DIDClient() self.replica_client = ReplicaClient() self.account_client = AccountLimitClient() self.account_client.set_account_limit('root', self.def_rse, -1) add_rse_attribute(self.def_rse, 'istape', 'False') self.upload_success_str = 'Successfully uploaded file %s' def test_rucio_version(self): """CLIENT(USER): Rucio version""" cmd = 'bin/rucio --version' exitcode, out, err = execute(cmd) nose.tools.assert_true('rucio' in err) def test_rucio_ping(self): """CLIENT(USER): Rucio ping""" cmd = 'rucio --host %s ping' % self.host print(self.marker + cmd) exitcode, out, err = execute(cmd) def test_add_account(self): """CLIENT(ADMIN): Add account""" tmp_val = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal('Added new account: %s\n' % tmp_val, out) def test_whoami(self): """CLIENT(USER): Rucio whoami""" cmd = 'rucio whoami' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_regexp_matches(out, re.compile('.*account.*')) def test_add_identity(self): """CLIENT(ADMIN): Add identity""" tmp_val = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_val exitcode, out, err = execute(cmd) nose.tools.assert_equal('Added new account: %s\n' % tmp_val, out) cmd = 'rucio-admin identity add --account %s --type GSS --id [email protected] --email [email protected]' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal( 'Added new identity to account: [email protected]%s\n' % tmp_val, out) def test_del_identity(self): """CLIENT(ADMIN): Test del identity""" tmp_acc = account_name_generator() # create account cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) # add identity to account cmd = 'rucio-admin identity add --account %s --type GSS --id [email protected] --email [email protected]' % tmp_acc exitcode, out, err = execute(cmd) # delete identity from account cmd = 'rucio-admin identity delete --account %s --type GSS --id [email protected]' % tmp_acc print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal('Deleted identity: [email protected]\n', out) # list identities for account cmd = 'rucio-admin account list-identities %s' % (tmp_acc) print(self.marker + cmd) print(cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal('', out) def test_attributes(self): """CLIENT(ADMIN): Add/List/Delete attributes""" tmp_acc = account_name_generator() # create account cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) # add attribute to the account cmd = 'rucio-admin account add-attribute {0} --key test_attribute_key --value true'.format( tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) # list attributes cmd = 'rucio-admin account list-attributes {0}'.format(tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) # delete attribute to the account cmd = 'rucio-admin account delete-attribute {0} --key test_attribute_key'.format( tmp_acc) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_equal(0, exitcode) def test_add_scope(self): """CLIENT(ADMIN): Add scope""" tmp_scp = scope_name_generator() tmp_acc = account_name_generator() cmd = 'rucio-admin account add %s' % tmp_acc exitcode, out, err = execute(cmd) cmd = 'rucio-admin scope add --account %s --scope %s' % (tmp_acc, tmp_scp) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal( 'Added new scope to account: %s-%s\n' % (tmp_scp, tmp_acc), out) def test_add_rse(self): """CLIENT(ADMIN): Add RSE""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal('Added new deterministic RSE: %s\n' % tmp_val, out) def test_add_rse_nondet(self): """CLIENT(ADMIN): Add non-deterministic RSE""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add --non-deterministic %s' % tmp_val print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_equal( 'Added new non-deterministic RSE: %s\n' % tmp_val, out) def test_list_rses(self): """CLIENT(ADMIN): List RSEs""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val exitcode, out, err = execute(cmd) cmd = 'rucio-admin rse list' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) nose.tools.assert_regexp_matches(out, re.compile('.*%s.*' % tmp_val)) def test_upload(self): """CLIENT(USER): Upload""" tmp_val = rse_name_generator() cmd = 'rucio-admin rse add %s' % tmp_val exitcode, out, err = execute(cmd) cmd = 'rucio upload' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) def test_download(self): """CLIENT(USER): Download""" cmd = 'rucio download' print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, ) def test_upload_file(self): """CLIENT(USER): Rucio upload files""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) nose.tools.assert_true((self.upload_success_str % path.basename(tmp_file1)) in out) nose.tools.assert_true((self.upload_success_str % path.basename(tmp_file2)) in out) nose.tools.assert_true((self.upload_success_str % path.basename(tmp_file3)) in out) def test_upload_file_guid(self): """CLIENT(USER): Rucio upload file with guid""" tmp_file1 = file_generator() tmp_guid = generate_uuid() cmd = 'rucio -v upload --rse {0} --guid {1} --scope {2} {3}'.format( self.def_rse, tmp_guid, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) nose.tools.assert_true((self.upload_success_str % path.basename(tmp_file1)) in out) def test_upload_repeated_file(self): """CLIENT(USER): Rucio upload repeated files""" # One of the files to upload is already catalogued but was removed tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_file1_name = path.basename(tmp_file1) cmd = 'rucio -v upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # get the rule for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) rule = out # delete the file from the catalog cmd = "rucio delete-rule {0}".format(rule) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # delete the physical file cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) nose.tools.assert_true((self.upload_success_str % tmp_file1_name) in out) def test_upload_repeated_file_dataset(self): """CLIENT(USER): Rucio upload repeated files to dataset""" # One of the files to upload is already in the dataset tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_file1_name = path.basename(tmp_file1) tmp_file3_name = path.basename(tmp_file3) tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # upload the files to the dataset cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file1 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1_name), out), None) # tmp_file3 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file3_name), out), None) def test_upload_file_dataset(self): """CLIENT(USER): Rucio upload files to dataset""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_file1_name = path.basename(tmp_file1) tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio -v upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1_name), out), None) def test_upload_adds_md5digest(self): """CLIENT(USER): Upload Checksums""" # user has a file to upload filename = file_generator() tmp_file1_name = path.basename(filename) file_md5 = md5(filename) # user uploads file cmd = 'rucio -v upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, filename) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # When inspecting the metadata of the new file the user finds the md5 checksum meta = self.did_client.get_metadata(scope=self.user, name=tmp_file1_name) nose.tools.assert_in('md5', meta) nose.tools.assert_equal(meta['md5'], file_md5) remove(filename) def test_create_dataset(self): """CLIENT(USER): Rucio add dataset""" tmp_name = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING cmd = 'rucio add-dataset ' + tmp_name print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search('Added ' + tmp_name, out), None) def test_add_files_to_dataset(self): """CLIENT(USER): Rucio add files to dataset""" tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_dataset = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # add files cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_file2) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # create dataset cmd = 'rucio add-dataset ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add files to dataset cmd = 'rucio attach {0} {3}:{1} {3}:{2}'.format( tmp_dataset, tmp_file1[5:], tmp_file2[5:], self.user) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # find the added files cmd = 'rucio list-files ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) def test_download_file(self): """CLIENT(USER): Rucio download files""" tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # download files cmd = 'rucio -v download --dir /tmp {0}:{1}'.format( self.user, tmp_file1[5:]) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/' # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # download files cmd = 'rucio -v download --dir /tmp {0}:{1}'.format( self.user, tmp_file1[5:-2] + '*') # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/' # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_filter(self): """CLIENT(USER): Rucio download with filter options""" # Use filter option to download file with wildcarded name tmp_file1 = file_generator() uuid = generate_uuid() cmd = 'rucio upload --rse {0} --scope {1} --guid {2} {3}'.format( self.def_rse, self.user, uuid, tmp_file1) exitcode, out, err = execute(cmd) print(out, err) remove(tmp_file1) wrong_guid = generate_uuid() cmd = 'rucio -v download --dir /tmp {0}:{1} --filter guid={2}'.format( self.user, '*', wrong_guid) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(self.user) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(tmp_file1[5:], out), None) cmd = 'rucio -v download --dir /tmp {0}:{1} --filter guid={2}'.format( self.user, '*', uuid) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(self.user) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) # Only use filter option to download file tmp_file1 = file_generator() uuid = generate_uuid() cmd = 'rucio upload --rse {0} --scope {1} --guid {2} {3}'.format( self.def_rse, self.user, uuid, tmp_file1) exitcode, out, err = execute(cmd) print(out, err) remove(tmp_file1) wrong_guid = generate_uuid() cmd = 'rucio -v download --dir /tmp --scope {0} --filter guid={1}'.format( self.user, wrong_guid) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(self.user) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(tmp_file1[5:], out), None) cmd = 'rucio -v download --dir /tmp --scope {0} --filter guid={1}'.format( self.user, uuid) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(self.user) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) # Only use filter option to download dataset tmp_file1 = file_generator() dataset_name = 'dataset_%s' % generate_uuid() cmd = 'rucio upload --rse {0} --scope {1} {2} {1}:{3}'.format( self.def_rse, self.user, tmp_file1, dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) remove(tmp_file1) db_session = session.get_session() db_session.query(models.DataIdentifier).filter_by( scope=self.user, name=dataset_name).one().length = 15 db_session.commit() cmd = 'rucio download --dir /tmp --scope {0} --filter length=100'.format( self.user) exitcode, out, err = execute(cmd) cmd = 'ls /tmp/{0}'.format(dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(tmp_file1[5:], out), None) cmd = 'rucio download --dir /tmp --scope {0} --filter length=15'.format( self.user) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) # Use filter option to download dataset with wildcarded name tmp_file1 = file_generator() dataset_name = 'dataset_%s' % generate_uuid() cmd = 'rucio upload --rse {0} --scope {1} {2} {1}:{3}'.format( self.def_rse, self.user, tmp_file1, dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) remove(tmp_file1) db_session = session.get_session() db_session.query(models.DataIdentifier).filter_by( scope=self.user, name=dataset_name).one().length = 1 db_session.commit() cmd = 'rucio download --dir /tmp {0}:{1} --filter length=10'.format( self.user, dataset_name[0:-1] + '*') exitcode, out, err = execute(cmd) cmd = 'ls /tmp/{0}'.format(dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(tmp_file1[5:], out), None) cmd = 'rucio download --dir /tmp {0}:{1} --filter length=1'.format( self.user, dataset_name[0:-1] + '*') print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = 'ls /tmp/{0}'.format(dataset_name) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(tmp_file1[5:], out), None) def test_download_succeeds_md5only(self): """CLIENT(USER): Rucio download succeeds MD5 only""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) filesize = stat(filename).st_size lfn = { 'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': file_md5 } # user uploads file self.replica_client.add_replicas(files=[lfn], rse=self.def_rse) rse_settings = rsemgr.get_rse_info(self.def_rse) protocol = rsemgr.create_protocol(rse_settings, 'write') protocol.connect() pfn = protocol.lfns2pfns(lfn).values()[0] protocol.put(filename[5:], pfn, filename[:5]) protocol.close() remove(filename) # download files cmd = 'rucio -v download --dir /tmp {0}:{1}'.format( self.user, filename[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/{0}'.format(self.user) # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(filename[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_fails_badmd5(self): """CLIENT(USER): Rucio download fails on MD5 mismatch""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) filesize = stat(filename).st_size lfn = { 'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': '0123456789abcdef0123456789abcdef' } # user uploads file self.replica_client.add_replicas(files=[lfn], rse=self.def_rse) rse_settings = rsemgr.get_rse_info(self.def_rse) protocol = rsemgr.create_protocol(rse_settings, 'write') protocol.connect() pfn = protocol.lfns2pfns(lfn).values()[0] protocol.put(filename[5:], pfn, filename[:5]) protocol.close() remove(filename) # download file cmd = 'rucio -v download --dir /tmp {0}:{1}'.format( self.user, filename[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) report = 'Local\ checksum\:\ {0},\ Rucio\ checksum\:\ 0123456789abcdef0123456789abcdef'.format( file_md5) print('searching', report, 'in', err) nose.tools.assert_not_equal(re.search(report, err), None) # The file should not exist cmd = 'ls /tmp/' # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(re.search(filename[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass def test_download_dataset(self): """CLIENT(USER): Rucio download dataset""" tmp_file1 = file_generator() tmp_dataset = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # create dataset cmd = 'rucio add-dataset ' + tmp_dataset print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add files to dataset cmd = 'rucio attach {0} {1}:{2}'.format( tmp_dataset, self.user, tmp_file1[5:]) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # download dataset cmd = 'rucio -v download --dir /tmp {0}'.format( tmp_dataset) # triming '/tmp/' from filename print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) search = '{0} successfully downloaded'.format( tmp_file1[5:]) # triming '/tmp/' from filename nose.tools.assert_not_equal(re.search(search, err), None) def test_create_rule(self): """CLIENT(USER): Rucio add rule""" tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add quota self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rules cmd = "rucio add-rule {0}:{1} 3 'spacetoken=ATLASSCRATCHDISK'".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) rule = out[:-1] # triming new line character # check if rule exist for the file cmd = "rucio list-rules {0}:{1}".format(self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(rule, out), None) def test_delete_rule(self): """CLIENT(USER): rule deletion""" self.account_client.set_account_limit('root', self.def_rse, -1) tmp_file1 = file_generator() # add files cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rse tmp_rse = rse_name_generator() cmd = 'rucio-admin rse add {0}'.format(tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) self.account_client.set_account_limit('root', tmp_rse, -1) # add rse atributes cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASSCRATCHDISK'.format( tmp_rse) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add rules cmd = "rucio add-rule {0}:{1} 1 'spacetoken=ATLASSCRATCHDISK'".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(err) print(out) # get the rules for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) (rule1, rule2) = out.split() # delete the rules for the file cmd = "rucio delete-rule {0}".format(rule1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) cmd = "rucio delete-rule {0}".format(rule2) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the file cmd = "rucio list-dids --filter type=all {0}:{1}".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal(5, len(out.splitlines())) def test_add_file_twice(self): """CLIENT(USER): Add file twice""" tmp_file1 = file_generator() # add file twice cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) nose.tools.assert_equal( re.search( "File {0}:{1} successfully uploaded on the storage".format( self.user, tmp_file1[5:]), out), None) def test_add_delete_add_file(self): """CLIENT(USER): Add/Delete/Add""" tmp_file1 = file_generator() # add file cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # get the rule for the file cmd = "rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format( self.user, tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) rule = out # delete the file from the catalog cmd = "rucio delete-rule {0}".format(rule) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # delete the fisical file cmd = "find /tmp/rucio_rse/ -name {0} |xargs rm".format(tmp_file1[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # modify the file to avoid same checksum cmd = "echo 'delta' >> {0}".format(tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # add the same file cmd = 'rucio upload --rse {0} --scope {1} {2}'.format( self.def_rse, self.user, tmp_file1) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_equal( re.search( "File {0}:{1} successfully uploaded on the storage".format( self.user, tmp_file1[5:]), out), None) def test_attach_files_dataset(self): """CLIENT(USER): Rucio attach files to dataset""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # upload the files cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file2, tmp_file3) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # attach the files to the dataset cmd = 'rucio attach {0} {1}:{2} {1}:{3}'.format( tmp_dsn, self.user, tmp_file2[5:], tmp_file3[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file2 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file2[5:]), out), None) # tmp_file3 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None) def test_detach_files_dataset(self): """CLIENT(USER): Rucio detach files to dataset""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_file2 = file_generator() tmp_file3 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3} {4} {5}'.format( self.def_rse, self.user, tmp_file1, tmp_file2, tmp_file3, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) remove(tmp_file2) remove(tmp_file3) # detach the files to the dataset cmd = 'rucio detach {0} {1}:{2} {1}:{3}'.format( tmp_dsn, self.user, tmp_file2[5:], tmp_file3[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # searching for the file in the new dataset cmd = 'rucio list-files {0}'.format(tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) # tmp_file1 must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, tmp_file1[5:]), out), None) # tmp_file3 must be in the dataset nose.tools.assert_equal( re.search("{0}:{1}".format(self.user, tmp_file3[5:]), out), None) def test_attach_file_twice(self): """CLIENT(USER): Rucio attach a file twice""" # Attach files to a dataset using the attach method tmp_file1 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) # attach the files to the dataset cmd = 'rucio attach {0} {1}:{2}'.format( tmp_dsn, self.user, tmp_file1[5:]) # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal(re.search("The file already exists", err), None) def test_attach_dataset_twice(self): """ CLIENT(USER): Rucio attach a dataset twice """ container = 'container_%s' % generate_uuid() dataset = 'dataset_%s' % generate_uuid() self.did_client.add_container(scope=self.user, name=container) self.did_client.add_dataset(scope=self.user, name=dataset) # Attach dataset to container cmd = 'rucio attach {0}:{1} {0}:{2}'.format(self.user, container, dataset) exitcode, out, err = execute(cmd) # Attach again cmd = 'rucio attach {0}:{1} {0}:{2}'.format(self.user, container, dataset) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal( re.search( "Data identifier already added to the destination content", err), None) def test_detach_non_existing_file(self): """CLIENT(USER): Rucio detach a non existing file""" tmp_file1 = file_generator() tmp_dsn = self.user + ':DSet' + rse_name_generator( ) # something like mock:DSetMOCK_S0M37HING # Adding files to a new dataset cmd = 'rucio upload --rse {0} --scope {1} {2} {3}'.format( self.def_rse, self.user, tmp_file1, tmp_dsn) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(tmp_file1) # attach the files to the dataset cmd = 'rucio detach {0} {1}:{2}'.format( tmp_dsn, self.user, 'file_ghost') # triming '/tmp/' from filenames print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) nose.tools.assert_not_equal( re.search("Data identifier not found.", err), None) def test_list_did_recursive(self): """ CLIENT(USER): List did recursive """ # Setup nested collections tmp_scope = 'mock' tmp_container_1 = 'container_%s' % generate_uuid() cmd = 'rucio add-container {0}:{1}'.format(tmp_scope, tmp_container_1) exitcode, out, err = execute(cmd) tmp_container_2 = 'container_%s' % generate_uuid() cmd = 'rucio add-container {0}:{1}'.format(tmp_scope, tmp_container_2) exitcode, out, err = execute(cmd) tmp_container_3 = 'container_%s' % generate_uuid() cmd = 'rucio add-container {0}:{1}'.format(tmp_scope, tmp_container_3) exitcode, out, err = execute(cmd) cmd = 'rucio attach {0}:{1} {0}:{2}'.format(tmp_scope, tmp_container_1, tmp_container_2) exitcode, out, err = execute(cmd) cmd = 'rucio attach {0}:{1} {0}:{2}'.format(tmp_scope, tmp_container_2, tmp_container_3) exitcode, out, err = execute(cmd) # All attached DIDs are expected cmd = 'rucio list-dids {0}:{1} --recursive'.format( tmp_scope, tmp_container_1) exitcode, out, err = execute(cmd) nose.tools.assert_not_equal(re.search(tmp_container_1, out), None) nose.tools.assert_not_equal(re.search(tmp_container_2, out), None) nose.tools.assert_not_equal(re.search(tmp_container_3, out), None) # Wildcards are not allowed to use with --recursive cmd = 'rucio list-dids {0}:* --recursive'.format(tmp_scope) exitcode, out, err = execute(cmd) nose.tools.assert_not_equal( re.search("Option recursive cannot be used with wildcards", err), None) def test_attach_many_dids(self): """ CLIENT(USER): Rucio attach many (>1000) DIDs """ # Setup data for CLI check tmp_dsn_name = 'Container' + rse_name_generator() tmp_dsn_did = self.user + ':' + tmp_dsn_name self.did_client.add_did(scope=self.user, name=tmp_dsn_name, type='CONTAINER') files = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': self.user, 'type': 'DATASET' } for i in range(0, 1500)] self.did_client.add_dids(files[:1000]) self.did_client.add_dids(files[1000:]) # Attaching over 1000 DIDs with CLI cmd = 'rucio attach {0}'.format(tmp_dsn_did) for tmp_file in files: cmd += ' {0}:{1}'.format(tmp_file['scope'], tmp_file['name']) exitcode, out, err = execute(cmd) print(out) print(err) # Checking if the execution was successfull and if the DIDs belong together nose.tools.assert_not_equal( re.search('DIDs successfully attached', out), None) cmd = 'rucio list-content {0}'.format(tmp_dsn_did) print(self.marker + cmd) exitcode, out, err = execute(cmd) # first dataset must be in the container nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, files[0]['name']), out), None) # last dataset must be in the container nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, files[-1]['name']), out), None) # Setup data with file did_file_path = 'list_dids.txt' files = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': self.user, 'type': 'DATASET' } for i in range(0, 1500)] self.did_client.add_dids(files[:1000]) self.did_client.add_dids(files[1000:]) with open(did_file_path, 'w') as did_file: for file in files: did_file.write(file['scope'] + ':' + file['name'] + '\n') did_file.close() # Attaching over 1000 files per file cmd = 'rucio attach {0} -f {1}'.format(tmp_dsn_did, did_file_path) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out) print(err) remove(did_file_path) # Checking if the execution was successfull and if the DIDs belong together nose.tools.assert_not_equal( re.search('DIDs successfully attached', out), None) cmd = 'rucio list-content {0}'.format(tmp_dsn_did) print(self.marker + cmd) exitcode, out, err = execute(cmd) # first file must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, files[0]['name']), out), None) # last file must be in the dataset nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, files[-1]['name']), out), None) def test_attach_many_dids_twice(self): """ CLIENT(USER) Attach many (>1000) DIDs twice """ # Setup data for CLI check container_name = 'container' + generate_uuid() container = self.user + ':' + container_name self.did_client.add_did(scope=self.user, name=container_name, type='CONTAINER') datasets = [{ 'name': 'dsn_%s' % generate_uuid(), 'scope': self.user, 'type': 'DATASET' } for i in range(0, 1500)] self.did_client.add_dids(datasets[:1000]) self.did_client.add_dids(datasets[1000:]) # Attaching over 1000 DIDs with CLI cmd = 'rucio attach {0}'.format(container) for dataset in datasets: cmd += ' {0}:{1}'.format(dataset['scope'], dataset['name']) exitcode, out, err = execute(cmd) # Attaching twice cmd = 'rucio attach {0}'.format(container) for dataset in datasets: cmd += ' {0}:{1}'.format(dataset['scope'], dataset['name']) exitcode, out, err = execute(cmd) nose.tools.assert_not_equal( re.search("DIDs successfully attached", out), None) # Attaching twice plus one DID that is not already attached new_dataset = { 'name': 'dsn_%s' % generate_uuid(), 'scope': self.user, 'type': 'DATASET' } datasets.append(new_dataset) self.did_client.add_did(scope=self.user, name=new_dataset['name'], type='DATASET') cmd = 'rucio attach {0}'.format(container) for dataset in datasets: cmd += ' {0}:{1}'.format(dataset['scope'], dataset['name']) exitcode, out, err = execute(cmd) nose.tools.assert_not_equal( re.search("DIDs successfully attached", out), None) cmd = 'rucio list-content {0}'.format(container) exitcode, out, err = execute(cmd) nose.tools.assert_not_equal( re.search("{0}:{1}".format(self.user, new_dataset['name']), out), None)
def main(argv=None): import pprint pp = pprint.PrettyPrinter(indent=1) try: from rucio.client.didclient import DIDClient try: client = DIDClient() except: print "Failed to get DIDClient" return except: print "Please first setup rucio" return if argv is None: argv = sys.argv dsid, campaigns, debug, verbose = commandline() if verbose: print(__doc__) scope = "mc16_13TeV" name = scope + "." + dsid + ".*.merge.AOD.*" #name = scope + "." + dsid + ".*.recon.AOD.*" if verbose: print "Inputs:", scope, dsid, name print dids = {'name': name} count = 0 datasetList = DIDClient.list_dids(client, scope, dids, 'container') for dsn in datasetList: count += 1 container = dsn contevents = 0 contcampaigns = [] matchcampaign = False if ':' in container: scope = container.split(':')[0] container = container.split(':')[1] else: scope = container.split('.')[0] contents = DIDClient.list_content(client, scope, container) for task in contents: name = task['name'] scope = name.split('.')[0] if debug: print name meta = DIDClient.get_metadata(client, scope, name) if debug: pp.pprint(meta) campaign = meta['campaign'].split(':')[1] if debug: print campaign if campaign in campaigns: matchcampaign = True dsnInfo = DIDClient.list_files(client, scope, name) events = 0 for data in dsnInfo: events += int(data['events']) if debug: print events contevents += events if campaign not in contcampaigns: contcampaigns.append(campaign) if matchcampaign: print '%10s %10s %s' % (",".join(contcampaigns), str(contevents), container) if count == 0: print "No merge.AOD containers for", dsid, "found [", name, "]"
def run(dsid, campaigns, summary, output, ratio, aod, evnt, verbose, argv=None, run_deriv=False): import pprint import re pp = pprint.PrettyPrinter(indent=1) try: from rucio.client.didclient import DIDClient try: client = DIDClient() except: print "Failed to get DIDClient" return except: print "Please first setup rucio" return if argv is None: argv = sys.argv allflags = not (output or ratio or aod or evnt or verbose) if verbose: print(__doc__) if not (output or summary): print "=================================== TIDs ===================================" ############################################################ # EVNT ############################################################ if not (output or summary): print "---- EVNT ----" scope = "mc15_13TeV" count = 0 evnt_mc_map = { "Name": "mySAMPLE", "shortName": "mySAMPLE", "TOT": 0, "MC15c": 0, "MC16a": 0, "MC16c": 0, "MC16d": 0, "MC16e": 0 } name = scope + "." + dsid + ".*.EVNT.*" dids = {'name': name} List = DIDClient.list_dids(client, scope, dids, 'container') for dsn in List: short_name = dsn.split(".")[1] + "." + dsn.split(".")[2] count += 1 container = dsn contevents = 0 contcampaigns = [] matchcampaign = False if ':' in container: scope = container.split(':')[0] container = container.split(':')[1] else: scope = container.split('.')[0] contents = DIDClient.list_content(client, scope, container) for task in contents: name = task['name'] if verbose: print name scope = name.split('.')[0] meta = DIDClient.get_metadata(client, scope, name) if verbose: pp.pprint(meta) if ':' in meta['campaign']: campaign = meta['campaign'].split(':')[1] else: campaign = meta['campaign'] if verbose: print campaign if campaign in campaigns: matchcampaign = True #campaign = meta['campaign'].split(':')[1] dsnInfo = DIDClient.list_files(client, scope, name) events = 0 for data in dsnInfo: if data['events'] is not None: events += int(data['events']) contevents += events if verbose: print events # print campaign, events, tid if not (output or summary): print '*EVNT %10s %10s %s' % (campaign, str(events), name) # Store events in each campaign evnt_mc_map["Name"] = dsn evnt_mc_map["shortName"] = short_name if campaign not in contcampaigns: contcampaigns.append(campaign) if ",".join(contcampaigns) not in evnt_mc_map: evnt_mc_map[",".join(contcampaigns)] = 0 evnt_mc_map[campaign] += events if matchcampaign: evnt_mc_map["TOT"] += contevents if count == 0: print "No evgen.EVNT containers for", dsid, "found [", name, "]" ############################################################ # AOD ############################################################ if not (output or summary): print "---- AOD ----" count = 0 aod_mc_map = { "Name": "mySAMPLE", "shortName": "mySAMPLE", "TOT": 0, "MC16a": 0, "MC16c": 0, "MC16d": 0, "MC16e": 0 } aod_name_map = {"MC16a": "", "MC16c": "", "MC16d": "", "MC16e": ""} scope = "mc16_13TeV" name = scope + "." + dsid + ".*.recon.AOD.*" dids = {'name': name} List = DIDClient.list_dids(client, scope, dids, 'container') for dsn in List: pattern = re.compile(r'AOD.e\d\d\d\d_[s|a]\d{3,4}_r*\d') if verbose: print dsn, "\n > pattern AOD.eXXXX_a|sYYYY_rZZZZ is found: ", ( pattern.search(dsn) is not None) if not pattern.search(dsn): continue short_name = dsn.split(".")[1] + "." + dsn.split(".")[2] count += 1 container = dsn contevents = 0 contcampaigns = [] matchcampaign = False if ':' in container: scope = container.split(':')[0] container = container.split(':')[1] else: scope = container.split('.')[0] contents = DIDClient.list_content(client, scope, container) for task in contents: name = task['name'] if verbose: print name scope = name.split('.')[0] meta = DIDClient.get_metadata(client, scope, name) if verbose: pp.pprint(meta) if ':' in meta['campaign']: campaign = meta['campaign'].split(':')[1] else: campaign = meta['campaign'] if verbose: print campaign if campaign in campaigns: matchcampaign = True dsnInfo = DIDClient.list_files(client, scope, name) events = 0 if campaign not in contcampaigns: contcampaigns.append(campaign) if (campaign == "MC16a" and "r9364" not in name): if verbose: print "Found MC16a Sample but not r-tag!" continue if (campaign == "MC16c" and "r9781" not in name): if verbose: print "Found MC16c Sample but not r-tag!" continue if (campaign == "MC16d" and "r10201" not in name): if verbose: print "Found MC16d Sample but not r-tag!" continue if (campaign == "MC16e" and "r10724" not in name): #10724 if verbose: print "Found MC16e Sample but not r-tag!" continue for data in dsnInfo: events += int(data['events']) contevents += events if verbose: print events # print campaign, events, tid if not (output or summary): print '*AOD %10s %10s %s' % (campaign, str(events), name) # Store events in each campaign aod_mc_map["Name"] = dsn aod_mc_map["shortName"] = short_name if matchcampaign: aod_mc_map[",".join(contcampaigns)] += contevents aod_name_map[",".join(contcampaigns)] = container aod_mc_map["TOT"] += contevents if count == 0: print "No recon.AOD containers for", dsid, "found [", name, "]" # print EXOT5 if not (output or summary) and run_deriv: print '---Running derivation of EXOT5----' deriv_mc_map = { "Name": "mySAMPLE", "shortName": "mySAMPLE", "TOT": 0, "MC16a": 0, "MC16c": 0, "MC16d": 0, "MC16e": 0, "mc16_13TeV": 0 } name = scope + "." + dsid + ".*.deriv.DAOD_EXOT5.*" dids = {'name': name} List = [] if run_deriv: List = DIDClient.list_dids(client, scope, dids, 'container') campaign = "" for dsn in List: container = dsn if ':' in container: scope = container.split(':')[0] container = container.split(':')[1] else: scope = container.split('.')[0] contents = DIDClient.list_content(client, scope, container) if "_r9364" in container: campaign = 'MC16a' elif "_r9781" in container: campaign = 'MC16c' elif "_r10201" in container: campaign = 'MC16d' elif "_r10724" in container: campaign = 'MC16e' else: campaign = 'mc16_13TeV' if ('p3575' not in container) and ('p3627' not in container): continue if campaign == 'MC16a' and 'r9364_p3575' not in container: continue for task in contents: name = task['name'] if verbose: print name scope = name.split('.')[0] meta = DIDClient.get_metadata(client, scope, name) if verbose: pp.pprint(meta) #if ':' in meta['campaign']: # campaign = meta['campaign'].split(':')[1] #else: # campaign = meta['campaign'] if verbose: print campaign if campaign in campaigns: matchcampaign = True dsnInfo = DIDClient.list_files(client, scope, name) events = 0 if campaign not in contcampaigns: contcampaigns.append(campaign) if (campaign == "MC16a" and ("r9364" not in name)): if verbose: print "Found MC16a Sample but not r-tag!" continue if (campaign == "MC16c" and "r9781" not in name): if verbose: print "Found MC16c Sample but not r-tag!" continue if (campaign == "MC16d" and "r10201" not in name): if verbose: print "Found MC16d Sample but not r-tag!" continue if (campaign == "MC16e" and "r10724" not in name): #10724 if verbose: print "Found MC16e Sample but not r-tag!" continue for data in dsnInfo: events += int(data['events']) contevents += events if verbose: print events # print campaign, events, tid if not (output or summary): print '*EXOT5 %10s %10s %s' % (campaign, str(events), name) # Store events in each campaign deriv_mc_map["Name"] = dsn deriv_mc_map["shortName"] = short_name if matchcampaign and campaigns != '': #deriv_mc_map[",".join(contcampaigns)] += contevenst deriv_mc_map[campaign] += contevents #aod_name_map[",".join(contcampaigns)] = container deriv_mc_map["TOT"] += contevents ############################################################ # Print Summary ############################################################ if not (output): print "\n=================================== Summary ===================================" # EVNT if not (output): print "---- EVNT ----" print '#EVNT: %10s' % (str(evnt_mc_map["Name"])) if (output and evnt and evnt_mc_map["Name"]): print '%10s' % (str(evnt_mc_map["Name"])) for key, value in evnt_mc_map.iteritems(): if key in campaigns: if not (output): print '+EVNT %10s %10s ' % (key, str(value)) if not (output): print '=EVNT %10s %10s ' % ("TOT", str(evnt_mc_map["TOT"])) # AOD if not (output): print "---- AOD ----" print '#AOD: %10s' % (str(aod_mc_map["shortName"])) for key, value in aod_mc_map.iteritems(): if key in campaigns: if not (output): print '+AOD %10s %10s %10s' % (key, str(value), str(aod_name_map[key])) if (output and aod_name_map[key] and aod): print '%s %s' % (key, str(aod_name_map[key])) if not (output): print '=AOD %10s %10s' % ("TOT", str(aod_mc_map["TOT"])) ############################################################ # Print Output ############################################################ if not (output): print "\n=================================== Final ===================================" if not (output): print "Short Name - Total EVNT - AOD/EVNT(MC16a) - AOD/EVNT(MC16c) - AOD/EVNT(MC16d) - AOD/EVNT(MC16e)" ''' print '%10s %10s %10s/%s=%.2f %10s/%s=%.2f %10s/%s=%.2f %10s/%s=%.2f' % (evnt_mc_map["shortName"], ''' if (ratio or allflags): print '%s %s %s %s %.2f %s %s %.2f %s %s %.2f %s %s %.2f' % ( evnt_mc_map["shortName"], str( evnt_mc_map["TOT"] / 1.e6), str(aod_mc_map["MC16a"] / 1e6), str((evnt_mc_map["MC16a"] + evnt_mc_map["MC15c"]) / 1e6), safe_div(float(aod_mc_map["MC16a"]), (evnt_mc_map["MC16a"] + evnt_mc_map["MC15c"])), str(aod_mc_map["MC16c"] / 1e6), str(evnt_mc_map["MC16c"] / 1e6), safe_div(float(aod_mc_map["MC16c"]), evnt_mc_map["MC16c"]), str(aod_mc_map["MC16d"] / 1e6), str((evnt_mc_map["MC16d"] + evnt_mc_map["MC16c"]) / 1e6), safe_div(float(aod_mc_map["MC16d"]), (evnt_mc_map["MC16d"] + evnt_mc_map["MC16c"])), str(aod_mc_map["MC16e"] / 1e6), str(evnt_mc_map["MC16e"] / 1e6), safe_div(float(aod_mc_map["MC16e"]), evnt_mc_map["MC16e"])) # return the event count information return aod_mc_map, evnt_mc_map, deriv_mc_map
} # perform upload uploadClient.upload([file]) # In[4]: did = list(client.list_replicas([{'scope': Default_Scope, 'name': name_file}])) print(json.dumps(did[0], indent=4, sort_keys=True)) # In[5]: try: get_meta = didc.get_metadata(scope=Default_Scope, name=name_file, plugin='ALL') for x in get_meta: print(x, ':', get_meta[str(x)]) except: print('no metadata associated to ', name_file) # In[6]: today = str(time.strftime('%Y%m%d')) set_meta = didc.set_metadata(scope=Default_Scope, name=name_file, key='night', value=today, recursive=False)
scope = did_list[0] filename = did_list[1] else: filename = did scope = 'ams-user-testuser1' rse_name = 'TW-EOS00_AMS02DATADISK' adler32 = '' md5 = '' bytes = 0 #print 'before:' for x in repCli.list_replicas([{'scope': scope, 'name': filename}]): adler32 = x['adler32'] md5 = x['md5'] bytes = x['bytes'] # print x file_meta = didCli.get_metadata(scope, filename) ##repCli.delete_replicas(rse_name, [{'scope': scope, 'name': filename}]) ##print 'after deletion:' ##for x in rep.list_replicas([{'scope': scope, 'name': filename}]): ## print x try: repCli.add_replica(rse_name, scope, filename, bytes, adler32, md5, file_meta) except exception.Duplicate: print 'already replicated, but adding rules' ruleCli.add_replication_rule(dids=[{'scope': scope, 'name': filename}], copies=1, \ rse_expression=rse_name, grouping='DATASET') continue ruleCli.add_replication_rule(dids=[{'scope': scope, 'name': filename}], copies=1, \ rse_expression=rse_name, grouping='DATASET') #print 'after add:'
def lfn2pfn_DUNE(scope, name, rse, rse_attrs, protocol_attrs): global metacat_base from rucio.common import config from rucio.common.types import InternalScope from rucio.rse import rsemanager from metacat.webapi import MetaCatClient # current URL: https://metacat.fnal.gov:9443/dune_meta_demo/app metacat_url = config.config_get( 'policy', 'metacat_base_url') or os.environ.get("METACAT_SERVER_URL") if metacat_url is None: raise ValueError("MetaCat client URL is not configured") metacat_client = MetaCatClient(metacat_url) def get_metadata_field(metadata, field): if field in metadata: return metadata[field] return 'None' # check to see if PFN is already cached in Rucio's metadata system didclient = None didmd = {} internal_scope = InternalScope(scope) if getattr(rsemanager, 'CLIENT_MODE', None): from rucio.client.didclient import DIDClient didclient = DIDClient() didmd = didclient.get_metadata(internal_scope, name) if getattr(rsemanager, 'SERVER_MODE', None): from rucio.core.did import get_metadata didmd = get_metadata(internal_scope, name) # if it is, just return it md_key = 'PFN_' + rse if md_key in didmd: return didmd[md_key] lfn = scope + ':' + name jsondata = metacat_client.get_file(name=lfn) metadata = jsondata["metadata"] # determine year from timestamps timestamp = None if 'core.start_time' in metadata: timestamp = metadata['core.start_time'] elif 'core.end_time' in metadata: timestamp = metadata['core.end_time'] elif 'created_timestamp' in jsondata: timestamp = jsondata['created_timestamp'] if timestamp is None: year = 'None' else: dt = datetime.utcfromtimestamp(timestamp) year = str(dt.year) # determine hashes from run number run_number = 0 if 'core.runs' in metadata: run_number = int(metadata['core.runs'][0]) hash1 = "%02d" % ((run_number // 1000000) % 100) hash2 = "%02d" % ((run_number // 10000) % 100) hash3 = "%02d" % ((run_number // 100) % 100) hash4 = "%02d" % (run_number % 100) run_type = get_metadata_field(metadata, 'core.run_type') data_tier = get_metadata_field(metadata, 'core.data_tier') file_type = get_metadata_field(metadata, 'core.file_type') data_stream = get_metadata_field(metadata, 'core.data_stream') data_campaign = get_metadata_field(metadata, 'DUNE.campaign') filename = name pfn = 'pnfs/dune/tape_backed/dunepro/' + run_type + '/' + data_tier + '/' + year + '/' + file_type + '/' + data_stream + '/' + data_campaign + '/' + hash1 + '/' + hash2 + '/' + hash3 + '/' + hash4 + '/' + filename # store the PFN in Rucio metadata for next time if getattr(rsemanager, 'CLIENT_MODE', None): didclient.set_metadata(internal_scope, name, md_key, pfn) if getattr(rsemanager, 'SERVER_MODE', None): from rucio.core.did import set_metadata set_metadata(internal_scope, name, md_key, pfn) return pfn