コード例 #1
0
ファイル: weijen_register.py プロジェクト: ChengHsi/rucyio
def main(argv):
    try:
        did_client=DIDClient()
        rule_client=RuleClient()
        openfile_name = sys.argv[1]
        scope_name = 'twgrid-user-wchang'
        mysql_engine = create_engine("mysql://*****:*****@rucio-db01.grid.sinica.edu.tw/rucio")
        with open(openfile_name) as file:
            for line in file:
                connection = mysql_engine.connect()
                data=line.strip('\n')
                dataset=data.split(" ")[0]
                file_name=data.split(" ")[1]
                file_size=int(data.split(" ")[2])
                account=data.split(" ")[3]
                pre_md5='twgrid-user-wchang:'+file_name
                md5_sum = hashlib.md5(pre_md5).hexdigest()
                files = []
                files.append({'scope': scope_name, 'name': file_name, 'md5':md5_sum, 'bytes':file_size, 'adler32':'0cc737eb'})
                contact_db = connection.execute("select * from dids where scope='%s' and name='%s';"%(scope_name, file_name)) 
            
                num_rows = contact_db.rowcount
                if num_rows == 0:
                    print "Register File : %s "%file_name 
                    did_client.add_files_to_dataset(scope=scope_name, name=dataset, files=files, rse='TW-DPM01_TWGRIDSCRATCHDISK')
                    #rule_client.add_replication_rule(dids=[{'scope': scope_name, 'name': file_name}], account=account, copies=1, \
                    #                                 rse_expression='TW-DPM01_TWGRIDSCRATCHDISK', grouping='DATASET')
                else:
                    print "Attach File : %s To %s "%(file_name, dataset)
                    did_client.attach_dids(scope=scope_name, name=dataset, dids=files)

                connection.close()                
 
    except SubscriptionDuplicate, e:
        print e
コード例 #2
0
    def rucio_register(self, filenames):
        files = []
        dids = []

        for filename in filenames:
            size = os.stat(str(filename)).st_size
            adler = adler32(str(filename))
            files.append({
                'scope': self.scope,
                'name': str(filename.parts[-1]),
                'bytes': size,
                'adler32': adler,
                'pfn': self.pfn + str(filename.parts[-1])
            })

        replica_client = ReplicaClient()
        replica_client.add_replicas(rse=self.rse, files=files)
        didclient = DIDClient()
        didclient.add_files_to_dataset(self.scope, self.dataset, files)
コード例 #3
0
class TestDIDClients:

    def setup(self):
        self.account_client = AccountClient()
        self.scope_client = ScopeClient()
        self.meta_client = MetaClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.rse_client = RSEClient()

    def test_list_dids(self):
        """ DATA IDENTIFIERS (CLIENT): List dids by pattern."""
        tmp_scope = scope_name_generator()
        tmp_files = []
        tmp_files.append('file_a_1%s' % generate_uuid())
        tmp_files.append('file_a_2%s' % generate_uuid())
        tmp_files.append('file_b_1%s' % generate_uuid())
        tmp_rse = 'MOCK'

        self.scope_client.add_scope('jdoe', tmp_scope)
        for tmp_file in tmp_files:
            self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 1)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 3)
        results = []

        filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)}
        for result in self.did_client.list_dids(tmp_scope, filters):
            results.append(result)
        assert_equal(len(results), 0)
        with assert_raises(UnsupportedOperation):
            self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype')

    def test_list_recursive(self):
        """ DATA IDENTIFIERS (CLIENT): List did recursive """
        # Create nested containers and datast
        tmp_scope_1 = 'list-did-recursive'
        tmp_scope_2 = 'list-did-recursive-2'
        self.scope_client.add_scope('root', tmp_scope_1)
        self.scope_client.add_scope('root', tmp_scope_2)

        tmp_container_1 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1)

        tmp_container_2 = 'container_%s' % generate_uuid()
        self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2)

        tmp_dataset_1 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1)

        tmp_dataset_2 = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2)

        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}])
        self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}])

        # List DIDs not recursive - only the first container is expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})]
        assert_equal(dids, [tmp_container_1])

        # List DIDs recursive - first container and all attached collections are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 in dids)
        assert_true(tmp_dataset_2 in dids)
        assert_equal(len(dids), 4)

        # List DIDs recursive - only containers are expected
        dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})]
        assert_true(tmp_container_1 in dids)
        assert_true(tmp_container_2 in dids)
        assert_true(tmp_dataset_1 not in dids)
        assert_true(tmp_dataset_2 not in dids)
        assert_equal(len(dids), 2)

    def test_list_by_length(self):
        """ DATA IDENTIFIERS (CLIENT): List did with length """
        tmp_scope = 'mock'

        tmp_dsn = 'dsn_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_not_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

        dids = self.did_client.list_dids(tmp_scope, {'length': 0})
        results = []
        for d in dids:
            results.append(d)
        assert_equal(len(results), 0)

    def test_list_by_metadata(self):
        """ DATA IDENTIFIERS (CLIENT): List did with metadata"""
        dsns = []
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn1)

        dataset_meta = {'project': 'data12_8TeV',
                        'run_number': 400000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m920',
                        }
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta)
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn2)
        dataset_meta['run_number'] = 400001
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta)

        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn3)
        dataset_meta['stream_name'] = 'physics_Egamma'
        dataset_meta['datatype'] = 'NTUP_SMWZ'
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn1)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn2)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)

        with assert_raises(KeyNotFound):
            self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'})

    def test_add_did(self):
        """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        tmp_dsn = 'dsn_%s' % generate_uuid()
        root = InternalAccount('root')
        set_local_account_limit(root, get_rse_id('MOCK'), -1)
        set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1)

        # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename
        dataset_meta = {'project': 'data13_hip',
                        'run_number': 300000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m927',
                        }
        rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}]

        with assert_raises(ScopeNotFound):
            self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules)

        files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ]
        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files)

        files = []
        for i in range(5):
            lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 10}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})

        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}]

        with assert_raises(InvalidPath):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse)

        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)

        files = []
        for i in range(5):
            lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 100}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})
        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}]

        with assert_raises(InvalidPath):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)
        files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse)

        self.did_client.close(scope=tmp_scope, name=tmp_dsn)

        tmp_dsn_output = 'dsn_%s' % generate_uuid()
        self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2)
        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)]
        assert_equal(len(files), 2)

    def test_attach_dids_to_dids(self):
        """ DATA IDENTIFIERS (CLIENT): Attach dids to dids"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        nb_datasets = 5
        nb_files = 5
        attachments, dsns = list(), list()
        guid_to_query = None
        dsn = {}
        for i in range(nb_datasets):
            attachment = {}
            attachment['scope'] = tmp_scope
            attachment['name'] = 'dsn.%s' % str(generate_uuid())
            attachment['rse'] = tmp_rse
            files = []
            for i in range(nb_files):
                files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                              'bytes': 724963570, 'adler32': '0cc737eb',
                              'meta': {'guid': str(generate_uuid()), 'events': 100}})
            attachment['dids'] = files
            guid_to_query = files[0]['meta']['guid']
            dsn = {'scope': tmp_scope, 'name': attachment['name']}
            dsns.append(dsn)
            attachments.append(attachment)

        self.did_client.add_datasets(dsns=dsns)
        self.did_client.attach_dids_to_dids(attachments=attachments)
        dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)]

        assert_equal([dsn], dsns_l)

        cnt_name = 'cnt_%s' % generate_uuid()
        self.did_client.add_container(scope='mock', name=cnt_name)
        with assert_raises(UnsupportedOperation):
            self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}])

    def test_add_files_to_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Add files to Datasets"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        dsn1 = 'dsn.%s' % str(generate_uuid())
        dsn2 = 'dsn.%s' % str(generate_uuid())
        meta = {'transient': True}
        files1, files2, nb_files = [], [], 5
        for i in range(nb_files):
            files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})
            files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                           'bytes': 724963570, 'adler32': '0cc737eb',
                           'meta': {'guid': str(generate_uuid()), 'events': 100}})

        self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1,
                                    rse=tmp_rse, meta=meta)
        self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2,
                                    rse=tmp_rse, meta=meta)

        attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse},
                       {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}]

        self.did_client.add_files_to_datasets(attachments)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 10)

        with assert_raises(FileAlreadyExists):
            self.did_client.add_files_to_datasets(attachments)

        for attachment in attachments:
            for i in range(nb_files):
                attachment['dids'].append({'scope': tmp_scope,
                                           'name': 'lfn.%s' % str(generate_uuid()),
                                           'bytes': 724963570,
                                           'adler32': '0cc737eb',
                                           'meta': {'guid': str(generate_uuid()),
                                                    'events': 100}})

        self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

        files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)]
        assert_equal(len(files), 15)

        # Corrupt meta-data
        files = []
        for attachment in attachments:
            for file in attachment['dids']:
                file['bytes'] = 1000
                break

        with assert_raises(FileConsistencyMismatch):
            self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True)

    def test_add_dataset(self):
        """ DATA IDENTIFIERS (CLIENT): Add dataset """
        tmp_scope = 'mock'
        tmp_dsn = 'dsn_%s' % generate_uuid()

        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'})

        did = self.did_client.get_did(tmp_scope, tmp_dsn)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_dsn)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_add_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """
        tmp_scope = 'mock'
        dsns = list()
        for i in range(500):
            tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}}
            dsns.append(tmp_dsn)
        self.did_client.add_datasets(dsns)

    def test_exists(self):
        """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """
        tmp_scope = 'mock'
        tmp_file = 'file_%s' % generate_uuid()
        tmp_rse = 'MOCK'

        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')

        did = self.did_client.get_did(tmp_scope, tmp_file)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_file)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_did_hierarchy(self):
        """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(4)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(4):
            self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None)
        for i in range(4):
            self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None)

        for i in range(4):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}])
        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            pass
            # TODO: fix, fix, fix
            # if r['name'] == cnt[1]:
            #    assert_equal(r['type'], 'container')
            #    assert_equal(r['level'], 0)
            # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]):
            #    assert_equal(r['level'], 0)
            # else:
            #     assert_equal(r['level'], 1)

    def test_detach_did(self):
        """ DATA IDENTIFIERS (CLIENT): Detach dids from a did"""

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(5)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(2)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(5):
            self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None)
        for i in range(2):
            self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None)

        for i in range(5):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}])

        with assert_raises(UnsupportedOperation):
            self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}])

        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}])
        self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}])
        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            if r['name'] == dst[1]:
                assert_equal(r['level'], 0)
            if r['type'] == 'file':
                if (r['name'] in file[6:9]):
                    assert_equal(r['level'], 0)
                else:
                    assert_not_equal(r['level'], 0)

        with assert_raises(UnsupportedOperation):
            self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}])

        self.did_client.close(scope, dst[4])
        metadata = self.did_client.get_metadata(scope, dst[4])
        i_bytes, i_length = metadata['bytes'], metadata['length']
        metadata = self.did_client.get_metadata(scope, file[8])
        file1_bytes = metadata['bytes']
        metadata = self.did_client.get_metadata(scope, file[9])
        file2_bytes = metadata['bytes']
        self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}])
        metadata = self.did_client.get_metadata(scope, dst[4])
        f_bytes, f_length = metadata['bytes'], metadata['length']
        assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes)
        assert_equal(i_length, f_length + 1 + 1)

    def test_scope_list(self):
        """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """

        # create some dummy data
        self.tmp_accounts = ['jdoe' for i in range(3)]
        self.tmp_scopes = [scope_name_generator() for i in range(3)]
        self.tmp_rses = [rse_name_generator() for i in range(3)]
        self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)]
        self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)]
        self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)]

        # add dummy data to the catalogue
        for i in range(3):
            self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i])
            self.rse_client.add_rse(self.tmp_rses[i])
            self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb')

        # put files in datasets
        for i in range(3):
            for j in range(3):
                files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}]
                self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j])
                self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files)

        # put datasets in containers
        for i in range(3):
            for j in range(3):
                datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}]
                self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j])
                self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets)

        # reverse check if everything is in order
        for i in range(3):
            result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True)

            r_topdids = []
            r_otherscopedids = []
            r_scope = []
            for r in result:
                if r['level'] == 0:
                    r_topdids.append(r['scope'] + ':' + r['name'])
                    r_scope.append(r['scope'])
                if r['scope'] != self.tmp_scopes[i]:
                    r_otherscopedids.append(r['scope'] + ':' + r['name'])
                    assert_in(r['level'], [1, 2])

            for j in range(3):
                assert_equal(self.tmp_scopes[i], r_scope[j])
                if j != i:
                    assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids)
            assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids)

    def test_get_did(self):
        """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        dsn = generate_uuid()

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')

        did = self.did_client.get_did(scope, file)

        assert_equal(did['scope'], scope)
        assert_equal(did['name'], file)

        self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000)
        did2 = self.did_client.get_did(scope, dsn)
        assert_equal(type(did2['expired_at']), datetime)

    def test_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back"""
        rse = 'MOCK'
        scope = 'mock'
        file = generate_uuid()
        keys = ['project', 'run_number']
        values = ['data13_hip', 12345678]

        self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb')
        for i in range(2):
            self.did_client.set_metadata(scope, file, keys[i], values[i])

        meta = self.did_client.get_metadata(scope, file)

        for i in range(2):
            assert_equal(meta[keys[i]], values[i])

    def test_list_content(self):
        """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier"""
        rse = 'MOCK'
        scope = 'mock'
        nbfiles = 5
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]
        files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)]

        self.did_client.add_dataset(scope, dataset1)

        with assert_raises(DataIdentifierAlreadyExists):
            self.did_client.add_dataset(scope, dataset1)

        self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse)

        self.did_client.add_container(scope, container)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_datasets_to_container(scope, container, datasets)

        contents = self.did_client.list_content(scope, container)

        datasets_s = [d['name'] for d in contents]
        assert_in(dataset1, datasets_s)
        assert_in(dataset2, datasets_s)

    def test_list_files(self):
        """ DATA IDENTIFIERS (CLIENT): List files for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dataset1 = generate_uuid()
        dataset2 = generate_uuid()
        container = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        for i in range(10):
            self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb')
            self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb')

        self.did_client.add_dataset(scope, dataset1)
        self.did_client.add_files_to_dataset(scope, dataset1, files1)

        self.did_client.add_dataset(scope, dataset2)
        self.did_client.add_files_to_dataset(scope, dataset2, files2)
        datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}]
        self.did_client.add_container(scope, container)
        self.did_client.add_datasets_to_container(scope, container, datasets)

        # List file content
        content = self.did_client.list_files(scope, files1[i]['name'])
        assert_true(content is not None)
        for d in content:
            assert_true(d['name'] == files1[i]['name'])

        # List container content
        for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]:
            assert_in(d, files1 + files2)

        # List non-existing data identifier content
        with assert_raises(DataIdentifierNotFound):
            self.did_client.list_files(scope, 'Nimportnawak')

    def test_list_replicas(self):
        """ DATA IDENTIFIERS (CLIENT): List replicas for a container"""
        rse = 'MOCK'
        scope = 'mock'
        dsn1 = generate_uuid()
        dsn2 = generate_uuid()
        cnt = generate_uuid()
        files1 = []
        files2 = []
        for i in range(10):
            files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})
            files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'})

        self.did_client.add_dataset(scope, dsn1)
        self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse)

        self.did_client.add_dataset(scope, dsn2)
        self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse)

        self.did_client.add_container(scope, cnt)
        self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}])

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}])
        assert_true(replicas is not None)

        replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}])
        assert_true(replicas is not None)

    @raises(UnsupportedOperation)
    def test_close(self):
        """ DATA IDENTIFIERS (CLIENT): test to close data identifiers"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files)

    @raises
    def test_open(self):
        """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account"""

        tmp_rse = 'MOCK'
        tmp_scope = 'mock'

        # Add dataset
        tmp_dataset = 'dsn_%s' % generate_uuid()

        # Add file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb')

        # Add dataset
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset)

        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Add a second file replica
        tmp_file = 'file_%s' % generate_uuid()
        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb')
        # Add files to dataset
        files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files)

        # Close dataset
        with assert_raises(UnsupportedStatus):
            self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False)
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False)

        # Add a third file replica
        self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True)

    def test_bulk_get_meta(self):
        """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back"""
        key = 'project'
        rse = 'MOCK'
        scope = 'mock'
        files = ['file_%s' % generate_uuid() for _ in range(4)]
        dst = ['dst_%s' % generate_uuid() for _ in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        meta_mapping = {}
        list_dids = []
        for idx in range(4):
            self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb')
            self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx)
            list_dids.append({'scope': scope, 'name': files[idx]})
            meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': dst[idx]})
            meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx)
        for idx in range(4):
            self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None)
            list_dids.append({'scope': scope, 'name': cnt[idx]})
            meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx)
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta]
        res_list_dids.sort()
        list_dids.sort()
        assert_equal(list_dids, res_list_dids)
        for meta in list_meta:
            did = '%s:%s' % (meta['scope'], meta['name'])
            met = meta_mapping[did]
            assert_equal((key, meta[key]), met)
        cnt = ['cnt_%s' % generate_uuid() for _ in range(4)]
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 12)
        list_dids = []
        for idx in range(4):
            list_dids.append({'scope': scope, 'name': cnt[idx]})
        list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)]
        assert_equal(len(list_meta), 0)
コード例 #4
0
ファイル: test_replica.py プロジェクト: sahiljajodia01/rucio
class TestReplicaClients:
    def setup(self):
        self.replica_client = ReplicaClient()
        self.did_client = DIDClient()

    def test_add_list_bad_replicas(self):
        """ REPLICA (CLIENT): Add bad replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK')
        rse_id1 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_bad_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id1:
                    if badrep['scope'] == rep['scope'] and badrep[
                            'name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Run necromancer once
        necromancer_run(threads=1, bulk=10000, once=True)

        # Try to attach a lost file
        tmp_dsn = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)
        with assert_raises(UnsupportedOperation):
            self.did_client.add_files_to_dataset(tmp_scope,
                                                 name=tmp_dsn,
                                                 files=files,
                                                 rse='MOCK')

        # Adding replicas to non-deterministic RSE
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK2')
        rse_id2 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        print(replicas, list_rep)
        r = self.replica_client.declare_bad_file_replicas(
            replicas, 'This is a good reason')
        print(r)
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id2:
                    if badrep['scope'] == rep['scope'] and badrep[
                            'name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Now adding non-existing bad replicas
        files = [
            'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' %
            (tmp_scope, generate_uuid()),
        ]
        r = self.replica_client.declare_bad_file_replicas(
            files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_add_suspicious_replicas(self):
        """ REPLICA (CLIENT): Add suspicious replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})

        # Adding replicas to non-deterministic RSE
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(
            replicas, 'This is a good reason')
        assert_equal(r, {})

        # Now adding non-existing bad replicas
        files = [
            'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' %
            (tmp_scope, generate_uuid()),
        ]
        r = self.replica_client.declare_suspicious_file_replicas(
            files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_bad_replica_methods_for_UI(self):
        """ REPLICA (REST): Test the listing of bad and suspicious replicas """
        mw = []
        headers1 = {
            'X-Rucio-Account': 'root',
            'X-Rucio-Username': '******',
            'X-Rucio-Password': '******'
        }
        result = TestApp(auth_app.wsgifunc(*mw)).get('/userpass',
                                                     headers=headers1,
                                                     expect_errors=True)
        assert_equal(result.status, 200)
        token = str(result.header('X-Rucio-Auth-Token'))
        headers2 = {'X-Rucio-Auth-Token': str(token)}

        data = dumps({})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_files.append(dumps(line))
        nb_tot_files = len(tot_files)

        data = dumps({'state': 'B'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_bad_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files1 = len(tot_bad_files)

        data = dumps({'state': 'S', 'list_pfns': 'True'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_suspicious_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_suspicious_files.append(dumps(line))
        nb_tot_suspicious_files = len(tot_suspicious_files)

        data = dumps({'state': 'T', 'list_pfns': 'True'})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_temporary_unavailable_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_temporary_unavailable_files.append(dumps(line))
        nb_tot_temporary_unavailable_files = len(
            tot_temporary_unavailable_files)

        assert_equal(
            nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files +
            nb_tot_temporary_unavailable_files)

        tomorrow = datetime.utcnow() + timedelta(days=1)
        data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        tot_bad_files = []
        for line in result.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files = len(tot_bad_files)
        assert_equal(nb_tot_bad_files, 0)

        data = dumps({})
        result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary',
                                                    headers=headers2,
                                                    params=data,
                                                    expect_errors=True)
        assert_equal(result.status, 200)
        nb_tot_bad_files2 = 0
        for line in result.body.split('\n'):
            if line != '':
                line = loads(line)
                nb_tot_bad_files2 += int(line.get('BAD', 0))
        assert_equal(nb_tot_bad_files1, nb_tot_bad_files2)

    def test_add_list_replicas(self):
        """ REPLICA (CLIENT): Add, change state and list file replicas """
        tmp_scope = 'mock'
        nbfiles = 5

        files1 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files1)

        files2 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files2)

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files1])
        ]
        assert_equal(len(replicas), len(files1))

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files2],
                schemes=['file'])
        ]
        assert_equal(len(replicas), 5)

        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files2],
                schemes=['srm'])
        ]
        assert_equal(len(replicas), 5)

        files3 = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'state': 'U',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files3)
        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files3],
                schemes=['file'])
        ]
        for i in range(nbfiles):
            assert_equal(replicas[i]['rses'], {})
        files4 = []
        for file in files3:
            file['state'] = 'A'
            files4.append(file)
        self.replica_client.update_replicas_states('MOCK3', files=files4)
        replicas = [
            r for r in self.replica_client.list_replicas(
                dids=[{
                    'scope': i['scope'],
                    'name': i['name']
                } for i in files3],
                schemes=['file'],
                unavailable=True)
        ]
        assert_equal(len(replicas), 5)
        for i in range(nbfiles):
            assert_in('MOCK3', replicas[i]['rses'])

    def test_delete_replicas(self):
        """ REPLICA (CLIENT): Add and delete file replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)
        with assert_raises(AccessDenied):
            self.replica_client.delete_replicas(rse='MOCK', files=files)

        # replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files])]
        # assert_equal(len(replicas), 0)

    def test_add_temporary_unavailable_pfns(self):
        """ REPLICA (CLIENT): Add temporary unavailable PFNs"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{
                'scope':
                f['scope'],
                'name':
                f['name']
        } for f in files],
                                                         schemes=['srm'],
                                                         unavailable=True):
            pfn = replica['pfns'].keys()[0]
            list_rep.append(pfn)

        # Submit bad PFNs
        now = datetime.utcnow()
        reason_str = generate_uuid()
        self.replica_client.add_bad_pfns(pfns=list_rep,
                                         reason=str(reason_str),
                                         state='TEMPORARY_UNAVAILABLE',
                                         expires_at=now.isoformat())
        result = get_bad_pfns(limit=10000,
                              thread=None,
                              total_threads=None,
                              session=None)
        bad_pfns = {}
        for res in result:
            bad_pfns[res['pfn']] = (res['state'], res['reason'],
                                    res['expires_at'])

        for pfn in list_rep:
            pfn = str(clean_surls([pfn])[0])
            assert_in(pfn, bad_pfns)
            assert_equal(str(bad_pfns[pfn][0]), 'TEMPORARY_UNAVAILABLE')
            assert_equal(bad_pfns[pfn][1], reason_str)

        # Submit with wrong state
        with assert_raises(RucioException):
            self.replica_client.add_bad_pfns(pfns=list_rep,
                                             reason=str(reason_str),
                                             state='BADSTATE',
                                             expires_at=now.isoformat())

        # Run minos once
        minos_run(threads=1, bulk=10000, once=True)
        result = get_bad_pfns(limit=10000,
                              thread=None,
                              total_threads=None,
                              session=None)
        pfns = [res['pfn'] for res in result]
        res_pfns = []
        for replica in list_rep:
            if replica in pfns:
                res_pfns.append(replica)
        assert_equal(res_pfns, [])

        # Check the state in the replica table
        for did in files:
            rep = get_replicas_state(scope=did['scope'], name=did['name'])
            assert_equal(str(rep.keys()[0]), 'TEMPORARY_UNAVAILABLE')

        rep = []
        for did in files:
            did['state'] = ReplicaState.from_sym('TEMPORARY_UNAVAILABLE')
            rep.append(did)

        # Run the minos expiration
        minos_temp_run(threads=1, once=True)
        # Check the state in the replica table
        for did in files:
            rep = get_replicas_state(scope=did['scope'], name=did['name'])
            assert_equal(str(rep.keys()[0]), 'AVAILABLE')

    def test_set_tombstone(self):
        """ REPLICA (CLIENT): set tombstone on replica """
        # Set tombstone on one replica
        rse = 'MOCK4'
        scope = 'mock'
        user = '******'
        name = generate_uuid()
        add_replica(rse, scope, name, 4, user)
        assert_equal(get_replica(rse, scope, name)['tombstone'], None)
        self.replica_client.set_tombstone([{
            'rse': rse,
            'scope': scope,
            'name': name
        }])
        assert_equal(get_replica(rse, scope, name)['tombstone'], OBSOLETE)

        # Set tombstone on locked replica
        name = generate_uuid()
        add_replica(rse, scope, name, 4, user)
        RuleClient().add_replication_rule([{
            'name': name,
            'scope': scope
        }],
                                          1,
                                          rse,
                                          locked=True)
        with assert_raises(ReplicaIsLocked):
            self.replica_client.set_tombstone([{
                'rse': rse,
                'scope': scope,
                'name': name
            }])

        # Set tombstone on not found replica
        name = generate_uuid()
        with assert_raises(ReplicaNotFound):
            self.replica_client.set_tombstone([{
                'rse': rse,
                'scope': scope,
                'name': name
            }])
コード例 #5
0
ファイル: test_replica.py プロジェクト: sahiljajodia01/rucio
class TestReplicaMetalink:
    def setup(self):
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.base_client = BaseClient(account='root',
                                      ca_cert=config_get('client', 'ca_cert'),
                                      auth_type='x509')
        self.token = self.base_client.headers['X-Rucio-Auth-Token']

        self.fname = generate_uuid()

        rses = ['MOCK', 'MOCK3', 'MOCK4']
        dsn = generate_uuid()
        self.files = [{
            'scope': 'mock',
            'name': self.fname,
            'bytes': 1,
            'adler32': '0cc737eb'
        }]

        self.did_client.add_dataset(scope='mock', name=dsn)
        self.did_client.add_files_to_dataset('mock',
                                             name=dsn,
                                             files=self.files,
                                             rse='MOCK')
        for r in rses:
            self.replica_client.add_replicas(r, self.files)

    def test_list_replicas_metalink_4(self):
        """ REPLICA (METALINK): List replicas as metalink version 4 """
        ml = xmltodict.parse(self.replica_client.list_replicas(
            self.files,
            metalink=4,
            unavailable=True,
            schemes=['https', 'sftp', 'file']),
                             xml_attribs=False)
        assert_equal(3, len(ml['metalink']['file']['url']))

    def test_get_did_from_pfns_nondeterministic(self):
        """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites"""
        rse = 'MOCK2'
        tmp_scope = 'mock'
        nbfiles = 3
        pfns = []
        input = {}
        rse_info = rsemgr.get_rse_info(rse)
        assert_equal(rse_info['deterministic'], False)
        files = [{
            'scope':
            tmp_scope,
            'name':
            'file_%s' % generate_uuid(),
            'bytes':
            1,
            'adler32':
            '0cc737eb',
            'pfn':
            'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s'
            % (tmp_scope, generate_uuid()),
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        for f in files:
            input[f['pfn']] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse,
                     files=files,
                     account='root',
                     ignore_availability=True)
        for replica in list_replicas(dids=[{
                'scope': f['scope'],
                'name': f['name'],
                'type': DIDType.FILE
        } for f in files],
                                     schemes=['srm'],
                                     ignore_availability=True):
            for rse in replica['rses']:
                pfns.extend(replica['rses'][rse])
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])

    def test_get_did_from_pfns_deterministic(self):
        """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites"""
        tmp_scope = 'mock'
        rse = 'MOCK3'
        nbfiles = 3
        pfns = []
        input = {}
        rse_info = rsemgr.get_rse_info(rse)
        assert_equal(rse_info['deterministic'], True)
        files = [{
            'scope': tmp_scope,
            'name': 'file_%s' % generate_uuid(),
            'bytes': 1,
            'adler32': '0cc737eb',
            'meta': {
                'events': 10
            }
        } for _ in range(nbfiles)]
        p = rsemgr.create_protocol(rse_info, 'read', scheme='srm')
        for f in files:
            pfn = p.lfns2pfns(lfns={
                'scope': f['scope'],
                'name': f['name']
            }).values()[0]
            pfns.append(pfn)
            input[pfn] = {'scope': f['scope'], 'name': f['name']}
        add_replicas(rse=rse,
                     files=files,
                     account='root',
                     ignore_availability=True)
        for result in self.replica_client.get_did_from_pfns(pfns, rse):
            pfn = result.keys()[0]
            assert_equal(input[pfn], result.values()[0])
コード例 #6
0
class TestReplicaClients:

    def setup(self):
        self.replica_client = ReplicaClient()
        self.did_client = DIDClient()

    def test_add_list_bad_replicas(self):
        """ REPLICA (CLIENT): Add bad replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK')
        rse_id1 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id1:
                    if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Run necromancer once
        run(threads=1, bulk=10000, once=True)

        # Try to attach a lost file
        tmp_dsn = 'dataset_%s' % generate_uuid()
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn)
        with assert_raises(UnsupportedOperation):
            self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK')

        # Adding replicas to non-deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb',
                  'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)]
        rse_info = rsemgr.get_rse_info('MOCK2')
        rse_id2 = rse_info['id']
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas, list_rep = [], []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        print(replicas, list_rep)
        r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason')
        print(r)
        assert_equal(r, {})
        bad_replicas = list_bad_replicas()
        nbbadrep = 0
        for rep in list_rep:
            for badrep in bad_replicas:
                if badrep['rse_id'] == rse_id2:
                    if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']:
                        nbbadrep += 1
        assert_equal(len(replicas), nbbadrep)

        # Now adding non-existing bad replicas
        files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ]
        r = self.replica_client.declare_bad_file_replicas(files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_add_suspicious_replicas(self):
        """ REPLICA (CLIENT): Add suspicious replicas"""
        tmp_scope = 'mock'
        nbfiles = 5
        # Adding replicas to deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)

        # Listing replicas on deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})

        # Adding replicas to non-deterministic RSE
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb',
                  'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK2', files=files)

        # Listing replicas on non-deterministic RSE
        replicas = []
        list_rep = []
        for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True):
            replicas.extend(replica['rses']['MOCK2'])
            list_rep.append(replica)
        r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason')
        assert_equal(r, {})

        # Now adding non-existing bad replicas
        files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ]
        r = self.replica_client.declare_suspicious_file_replicas(files, 'This is a good reason')
        output = ['%s Unknown replica' % rep for rep in files]
        assert_equal(r, {'MOCK2': output})

    def test_bad_replica_methods_for_UI(self):
        """ REPLICA (REST): Test the listing of bad and suspicious replicas """
        mw = []
        headers1 = {'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******'}
        r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True)
        assert_equal(r1.status, 200)
        token = str(r1.header('X-Rucio-Auth-Token'))
        headers2 = {'X-Rucio-Auth-Token': str(token)}

        data = dumps({})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_files.append(dumps(line))
        nb_tot_files = len(tot_files)

        data = dumps({'state': 'B'})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_bad_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files1 = len(tot_bad_files)

        data = dumps({'state': 'S', 'list_pfns': 'True'})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_suspicious_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_suspicious_files.append(dumps(line))
        nb_tot_suspicious_files = len(tot_suspicious_files)

        assert_equal(nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files)

        tomorrow = datetime.utcnow() + timedelta(days=1)
        data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        tot_bad_files = []
        for line in r2.body.split('\n'):
            if line != '':
                tot_bad_files.append(dumps(line))
        nb_tot_bad_files = len(tot_bad_files)
        assert_equal(nb_tot_bad_files, 0)

        data = dumps({})
        r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True)
        assert_equal(r2.status, 200)
        nb_tot_bad_files2 = 0
        for line in r2.body.split('\n'):
            if line != '':
                line = loads(line)
                nb_tot_bad_files2 += int(line['BAD'])
        assert_equal(nb_tot_bad_files1, nb_tot_bad_files2)

    def test_add_list_replicas(self):
        """ REPLICA (CLIENT): Add, change state and list file replicas """
        tmp_scope = 'mock'
        nbfiles = 5

        files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files1)

        files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files2)

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files1])]
        assert_equal(len(replicas), len(files1))

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['file'])]
        assert_equal(len(replicas), 5)

        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['srm'])]
        assert_equal(len(replicas), 5)

        files3 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK3', files=files3)
        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'])]
        for i in range(nbfiles):
            assert_equal(replicas[i]['rses'], {})
        files4 = []
        for file in files3:
            file['state'] = 'A'
            files4.append(file)
        self.replica_client.update_replicas_states('MOCK3', files=files4)
        replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'], unavailable=True)]
        assert_equal(len(replicas), 5)
        for i in range(nbfiles):
            assert_in('MOCK3', replicas[i]['rses'])

    def test_delete_replicas(self):
        """ REPLICA (CLIENT): Add and delete file replicas """
        tmp_scope = 'mock'
        nbfiles = 5
        files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)]
        self.replica_client.add_replicas(rse='MOCK', files=files)
        with assert_raises(AccessDenied):
            self.replica_client.delete_replicas(rse='MOCK', files=files)
コード例 #7
0
ファイル: test_did.py プロジェクト: pombredanne/rucio
class TestDIDClients:

    def setup(self):
        self.account_client = AccountClient()
        self.scope_client = ScopeClient()
        self.meta_client = MetaClient()
        self.did_client = DIDClient()
        self.replica_client = ReplicaClient()
        self.rse_client = RSEClient()

    def test_list_dids(self):
        """ DATA IDENTIFIERS (CLIENT): List dids by pattern."""
        tmp_scope = scope_name_generator()
        tmp_files = []
        tmp_files.append('file_a_1%s' % generate_uuid())
        tmp_files.append('file_a_2%s' % generate_uuid())
        tmp_files.append('file_b_1%s' % generate_uuid())
        tmp_rse = 'MOCK'

        self.scope_client.add_scope('jdoe', tmp_scope)
        for tmp_file in tmp_files:
            self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1L, '0cc737eb')

        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file\_a\_*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file\_a\_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 1)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file\__\_1*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 2)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'):
            results.append(result)
        assert_equal(len(results), 3)
        results = []
        for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}):
            results.append(result)
        assert_equal(len(results), 0)
        with assert_raises(UnsupportedOperation):
            self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype')

    def test_list_by_metadata(self):
        """ DATA IDENTIFIERS (CLIENT): List did with metadata"""
        dsns = []
        tmp_scope = 'mock'
        tmp_dsn1 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn1)

        dataset_meta = {'project': 'data12_8TeV',
                        'run_number': 400000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m920',
                        }
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta)
        tmp_dsn2 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn2)
        dataset_meta['run_number'] = 400001
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta)

        tmp_dsn3 = 'dsn_%s' % generate_uuid()
        dsns.append(tmp_dsn3)
        dataset_meta['stream_name'] = 'physics_Egamma'
        dataset_meta['datatype'] = 'NTUP_SMWZ'
        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn1)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)
        dsns.remove(tmp_dsn2)

        dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'})
        results = []
        for d in dids:
            results.append(d)
        for dsn in dsns:
            assert_in(dsn, results)

        with assert_raises(KeyNotFound):
            self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'})

    def test_add_did(self):
        """ DATA IDENTIFIERS (CLIENT): Add, populate and list did content"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        tmp_dsn = 'dsn_%s' % generate_uuid()

        # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename
        dataset_meta = {'project': 'data13_hip',
                        'run_number': 300000,
                        'stream_name': 'physics_CosmicCalo',
                        'prod_step': 'merge',
                        'datatype': 'NTUP_TRIG',
                        'version': 'f392_m927',
                        }
        rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}]

        with assert_raises(ScopeNotFound):
            self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules)

        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=[{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()),
                                                                                        'bytes': 724963570L, 'adler32': '0cc737eb'}, ])
        files = []
        for i in xrange(5):
            lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 10}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570L, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})

        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}]

        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)

        files = []
        for i in xrange(5):
            lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid())
            pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            # it doesn't work with mock: TBF
            # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta
            pfn += '%(tmp_dsn)s/%(lfn)s' % locals()
            file_meta = {'guid': str(generate_uuid()), 'events': 100}
            files.append({'scope': tmp_scope, 'name': lfn,
                          'bytes': 724963570L, 'adler32': '0cc737eb',
                          'pfn': pfn, 'meta': file_meta})
        rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}]
        self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse)

        self.did_client.close(scope=tmp_scope, name=tmp_dsn)

    def test_attach_dids_to_dids(self):
        """ DATA IDENTIFIERS (CLIENT): Attach dids to dids"""
        tmp_scope = 'mock'
        tmp_rse = 'MOCK'
        nb_datasets = 5
        nb_files = 5
        attachments, dsns = list(), list()
        guid_to_query = None
        dsn = {}
        for i in xrange(nb_datasets):
            attachment = {}
            attachment['scope'] = tmp_scope
            attachment['name'] = 'dsn.%s' % str(generate_uuid())
            attachment['rse'] = tmp_rse
            files = []
            for i in xrange(nb_files):
                files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()),
                              'bytes': 724963570L, 'adler32': '0cc737eb',
                              'meta': {'guid': str(generate_uuid()), 'events': 100}})
            attachment['dids'] = files
            guid_to_query = files[0]['meta']['guid']
            dsn = {'scope': tmp_scope, 'name': attachment['name']}
            dsns.append(dsn)
            attachments.append(attachment)

        self.did_client.add_datasets(dsns=dsns)
        self.did_client.attach_dids_to_dids(attachments=attachments)
        l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)]

        assert_equal([dsn], l)

        cnt_name = 'cnt_%s' % generate_uuid()
        self.did_client.add_container(scope='mock', name=cnt_name)
        with assert_raises(UnsupportedOperation):
            self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}])

    def test_add_dataset(self):
        """ DATA IDENTIFIERS (CLIENT): Add dataset """
        tmp_scope = 'mock'
        tmp_dsn = 'dsn_%s' % generate_uuid()

        self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'})

        did = self.did_client.get_did(tmp_scope, tmp_dsn)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_dsn)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_add_datasets(self):
        """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """
        tmp_scope = 'mock'
        dsns = list()
        for i in xrange(500):
            tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}}
            dsns.append(tmp_dsn)
        self.did_client.add_datasets(dsns)

    def test_exists(self):
        """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """
        tmp_scope = 'mock'
        tmp_file = 'file_%s' % generate_uuid()
        tmp_rse = 'MOCK'

        self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1L, '0cc737eb')

        did = self.did_client.get_did(tmp_scope, tmp_file)

        assert_equal(did['scope'], tmp_scope)
        assert_equal(did['name'], tmp_file)

        with assert_raises(DataIdentifierNotFound):
            self.did_client.get_did('i_dont_exist', 'neither_do_i')

    def test_did_hierarchy(self):
        """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(4)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb')
        for i in range(4):
            self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None)
        for i in range(4):
            self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None)

        for i in range(4):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1L, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1L, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}])
        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            pass
            # TODO: fix, fix, fix
            # if r['name'] == cnt[1]:
            #    assert_equal(r['type'], 'container')
            #    assert_equal(r['level'], 0)
            # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]):
            #    assert_equal(r['level'], 0)
            # else:
            #     assert_equal(r['level'], 1)

    def test_detach_did(self):
        """ DATA IDENTIFIERS (CLIENT): Detach dids from a did"""

        account = 'jdoe'
        rse = 'MOCK'
        scope = scope_name_generator()
        file = ['file_%s' % generate_uuid() for i in range(10)]
        dst = ['dst_%s' % generate_uuid() for i in range(4)]
        cnt = ['cnt_%s' % generate_uuid() for i in range(2)]

        self.scope_client.add_scope(account, scope)

        for i in range(10):
            self.replica_client.add_replica(rse, scope, file[i], 1L, '0cc737eb')
        for i in range(4):
            self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None)
        for i in range(2):
            self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None)

        for i in range(4):
            self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1L, 'adler32': '0cc737eb'},
                                                                 {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1L, 'adler32': '0cc737eb'}])

        self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}])

        with assert_raises(UnsupportedOperation):
            self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}])

        self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}])

        self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}])
        self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}])
        result = self.did_client.scope_list(scope, recursive=True)
        for r in result:
            if r['name'] == dst[1]:
                assert_equal(r['level'], 0)
            if r['type'] is 'file':
                if (r['name'] in file[6:9]):
                    assert_equal(r['level'], 0)
                else:
                    assert_not_equal(r['level'], 0)

        with assert_raises(UnsupportedOperation):
            self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}])

    def test_scope_list(self):
        """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """

        # create some dummy data
        self.tmp_accounts = ['jdoe' for i in xrange(3)]
        self.tmp_scopes = [scope_name_generator() for i in xrange(3)]
        self.tmp_rses = [rse_name_generator() for i in xrange(3)]
        self.tmp_files = ['file_%s' % generate_uuid() for i in xrange(3)]
        self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in xrange(3)]
        self.tmp_containers = ['container_%s' % generate_uuid() for i in xrange(3)]

        # add dummy data to the catalogue
        for i in xrange(3):
            self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i])
            self.rse_client.add_rse(self.tmp_rses[i])
            self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1L, '0cc737eb')

        # put files in datasets
        for i in xrange(3):
            for j in xrange(3):
                files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1L, 'adler32': '0cc737eb'}]
                self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j])
                self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files)
コード例 #8
0
def _rucio_register(beamline, uid, filenames):
    """
    Register the file in rucio for replication to SDCC.
    """
    scope = beamline
    container = uid

    replica_client = ReplicaClient()
    didclient = DIDClient()
    scopeclient = ScopeClient()
    ruleclient = RuleClient()

    for root, ending, filename in filenames:
        #size = os.stat(str(filename)).st_size
        #adler = adler32(str(filename))
        files = [{
            'scope': scope,
            'name': filename.split('/')[-1],
            'bytes': 1000,
            #'adler32': "unknown",
            'pfn': pfn + filename
        }]

        dataset = os.path.join(root, ending)
        dataset = '.'.join(dataset.split('/')[1:-1])
        print("DATASET", dataset)
        breakpoint()
        try:
            scopeclient.add_scope(account='nsls2data', scope=scope)
        except rucio.common.exception.Duplicate:
            pass

        replica_client.add_replicas(rse=rse, files=files)

        # Create a new container if it doesn't exist.
        try:
            didclient.add_did(scope=scope, name=uid, type='container')
        except rucio.common.exception.DataIdentifierAlreadyExists:
            pass

        # Create a replication rule.
        try:
            dids = [{'scope': scope, 'name': container}]
            ruleclient.add_replication_rule(
                dids=dids,
                copies=1,
                rse_expression='SDCC',
                lifetime=86400,  # Seconds
                account='nsls2data',
                source_replica_expression='NSLS2',
                purge_replicas=True,
                comment='purge_replicas in 24 hours')
        except rucio.common.exception.DuplicateRule:
            pass

        # Create a new dataset if it doesn't exist.
        try:
            didclient.add_did(scope=scope, name=dataset, type='dataset')
        except rucio.common.exception.DataIdentifierAlreadyExists:
            pass

        attachment = {
            'scope': scope,
            'name': uid,
            'dids': [{
                'scope': scope,
                'name': dataset
            }]
        }

        try:
            didclient.add_files_to_dataset(scope, dataset, files)
        except rucio.common.exception.FileAlreadyExists:
            pass

        try:
            didclient.add_datasets_to_containers([attachment])
        except rucio.common.exception.DuplicateContent:
            pass